mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 21:21:41 +00:00
RDMA/nldev: Add support for RDMA monitoring
Introduce a new netlink command to allow rdma event monitoring. The rdma events supported now are IB device registration/unregistration and net device attachment/detachment. Example output of rdma monitor and the commands which trigger the events: $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_0 [NETDEV_ATTACH] dev 6 mlx5_0 port 1 netdev 12 eth9 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs [UNREGISTER] dev 5 rocep8s0f0v0 [UNREGISTER] dev 6 rocep8s0f0v1 [UNREGISTER] dev 7 rocep8s0f0v2 [UNREGISTER] dev 8 rocep8s0f0v3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 2 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 4 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 5 Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com> Signed-off-by: Michael Guralnik <michaelgur@nvidia.com> Link: https://patch.msgid.link/20240909173025.30422-7-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
parent
8d159eb211
commit
9cbed5aab5
@ -1351,6 +1351,29 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ib_device_notify_register(struct ib_device *device)
|
||||||
|
{
|
||||||
|
struct net_device *netdev;
|
||||||
|
u32 port;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
|
||||||
|
if (ret)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rdma_for_each_port(device, port) {
|
||||||
|
netdev = ib_device_get_netdev(device, port);
|
||||||
|
if (!netdev)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret = rdma_nl_notify_event(device, port,
|
||||||
|
RDMA_NETDEV_ATTACH_EVENT);
|
||||||
|
dev_put(netdev);
|
||||||
|
if (ret)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ib_register_device - Register an IB device with IB core
|
* ib_register_device - Register an IB device with IB core
|
||||||
* @device: Device to register
|
* @device: Device to register
|
||||||
@ -1449,6 +1472,8 @@ int ib_register_device(struct ib_device *device, const char *name,
|
|||||||
dev_set_uevent_suppress(&device->dev, false);
|
dev_set_uevent_suppress(&device->dev, false);
|
||||||
/* Mark for userspace that device is ready */
|
/* Mark for userspace that device is ready */
|
||||||
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
|
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
|
||||||
|
|
||||||
|
ib_device_notify_register(device);
|
||||||
ib_device_put(device);
|
ib_device_put(device);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1491,6 +1516,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
disable_device(ib_dev);
|
disable_device(ib_dev);
|
||||||
|
rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
|
||||||
|
|
||||||
/* Expedite removing unregistered pointers from the hash table */
|
/* Expedite removing unregistered pointers from the hash table */
|
||||||
free_netdevs(ib_dev);
|
free_netdevs(ib_dev);
|
||||||
@ -2159,6 +2185,7 @@ static void add_ndev_hash(struct ib_port_data *pdata)
|
|||||||
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
|
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
|
||||||
u32 port)
|
u32 port)
|
||||||
{
|
{
|
||||||
|
enum rdma_nl_notify_event_type etype;
|
||||||
struct net_device *old_ndev;
|
struct net_device *old_ndev;
|
||||||
struct ib_port_data *pdata;
|
struct ib_port_data *pdata;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@ -2190,6 +2217,14 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
|
|||||||
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
|
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
|
||||||
|
|
||||||
add_ndev_hash(pdata);
|
add_ndev_hash(pdata);
|
||||||
|
|
||||||
|
/* Make sure that the device is registered before we send events */
|
||||||
|
if (xa_load(&devices, ib_dev->index) != ib_dev)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
|
||||||
|
rdma_nl_notify_event(ib_dev, port, etype);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ib_device_set_netdev);
|
EXPORT_SYMBOL(ib_device_set_netdev);
|
||||||
|
@ -311,6 +311,7 @@ int rdma_nl_net_init(struct rdma_dev_net *rnet)
|
|||||||
struct net *net = read_pnet(&rnet->net);
|
struct net *net = read_pnet(&rnet->net);
|
||||||
struct netlink_kernel_cfg cfg = {
|
struct netlink_kernel_cfg cfg = {
|
||||||
.input = rdma_nl_rcv,
|
.input = rdma_nl_rcv,
|
||||||
|
.flags = NL_CFG_F_NONROOT_RECV,
|
||||||
};
|
};
|
||||||
struct sock *nls;
|
struct sock *nls;
|
||||||
|
|
||||||
|
@ -170,6 +170,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
|
|||||||
[RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
|
[RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
|
||||||
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
|
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
|
||||||
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
|
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
|
||||||
|
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
|
||||||
};
|
};
|
||||||
|
|
||||||
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
|
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
|
||||||
@ -2722,6 +2723,129 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int fill_mon_netdev_association(struct sk_buff *msg,
|
||||||
|
struct ib_device *device, u32 port,
|
||||||
|
const struct net *net)
|
||||||
|
{
|
||||||
|
struct net_device *netdev = ib_device_get_netdev(device, port);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (netdev && !net_eq(dev_net(netdev), net))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
|
||||||
|
dev_name(&device->dev));
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (netdev) {
|
||||||
|
ret = nla_put_u32(msg,
|
||||||
|
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = nla_put_string(msg,
|
||||||
|
RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
dev_put(netdev);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
|
||||||
|
enum rdma_nl_notify_event_type type)
|
||||||
|
{
|
||||||
|
struct net_device *netdev;
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case RDMA_REGISTER_EVENT:
|
||||||
|
dev_warn_ratelimited(&device->dev,
|
||||||
|
"Failed to send RDMA monitor register device event\n");
|
||||||
|
break;
|
||||||
|
case RDMA_UNREGISTER_EVENT:
|
||||||
|
dev_warn_ratelimited(&device->dev,
|
||||||
|
"Failed to send RDMA monitor unregister device event\n");
|
||||||
|
break;
|
||||||
|
case RDMA_NETDEV_ATTACH_EVENT:
|
||||||
|
netdev = ib_device_get_netdev(device, port_num);
|
||||||
|
dev_warn_ratelimited(&device->dev,
|
||||||
|
"Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
|
||||||
|
port_num, netdev->ifindex);
|
||||||
|
dev_put(netdev);
|
||||||
|
break;
|
||||||
|
case RDMA_NETDEV_DETACH_EVENT:
|
||||||
|
dev_warn_ratelimited(&device->dev,
|
||||||
|
"Failed to send RDMA monitor netdev detach event: port %d\n",
|
||||||
|
port_num);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
|
||||||
|
enum rdma_nl_notify_event_type type)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb;
|
||||||
|
struct net *net;
|
||||||
|
int ret = 0;
|
||||||
|
void *nlh;
|
||||||
|
|
||||||
|
net = read_pnet(&device->coredev.rdma_net);
|
||||||
|
if (!net)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
||||||
|
if (!skb)
|
||||||
|
return -ENOMEM;
|
||||||
|
nlh = nlmsg_put(skb, 0, 0,
|
||||||
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
|
||||||
|
0, 0);
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case RDMA_REGISTER_EVENT:
|
||||||
|
case RDMA_UNREGISTER_EVENT:
|
||||||
|
ret = fill_nldev_handle(skb, device);
|
||||||
|
if (ret)
|
||||||
|
goto err_free;
|
||||||
|
break;
|
||||||
|
case RDMA_NETDEV_ATTACH_EVENT:
|
||||||
|
case RDMA_NETDEV_DETACH_EVENT:
|
||||||
|
ret = fill_mon_netdev_association(skb, device,
|
||||||
|
port_num, net);
|
||||||
|
if (ret)
|
||||||
|
goto err_free;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
|
||||||
|
if (ret)
|
||||||
|
goto err_free;
|
||||||
|
|
||||||
|
nlmsg_end(skb, nlh);
|
||||||
|
ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
|
||||||
|
if (ret && ret != -ESRCH) {
|
||||||
|
skb = NULL; /* skb is freed in the netlink send-op handling */
|
||||||
|
goto err_free;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_free:
|
||||||
|
rdma_nl_notify_err_msg(device, port_num, type);
|
||||||
|
nlmsg_free(skb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void __init nldev_init(void)
|
void __init nldev_init(void)
|
||||||
{
|
{
|
||||||
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
|
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
#include <linux/netlink.h>
|
#include <linux/netlink.h>
|
||||||
#include <uapi/rdma/rdma_netlink.h>
|
#include <uapi/rdma/rdma_netlink.h>
|
||||||
|
|
||||||
|
struct ib_device;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
|
RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
|
||||||
RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
|
RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
|
||||||
@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
|
|||||||
*/
|
*/
|
||||||
bool rdma_nl_chk_listeners(unsigned int group);
|
bool rdma_nl_chk_listeners(unsigned int group);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepare and send an event message
|
||||||
|
* @ib: the IB device which triggered the event
|
||||||
|
* @port_num: the port number which triggered the event - 0 if unused
|
||||||
|
* @type: the event type
|
||||||
|
* Returns 0 on success or a negative error code
|
||||||
|
*/
|
||||||
|
int rdma_nl_notify_event(struct ib_device *ib, u32 port_num,
|
||||||
|
enum rdma_nl_notify_event_type type);
|
||||||
|
|
||||||
struct rdma_link_ops {
|
struct rdma_link_ops {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
const char *type;
|
const char *type;
|
||||||
|
@ -15,6 +15,7 @@ enum {
|
|||||||
enum {
|
enum {
|
||||||
RDMA_NL_GROUP_IWPM = 2,
|
RDMA_NL_GROUP_IWPM = 2,
|
||||||
RDMA_NL_GROUP_LS,
|
RDMA_NL_GROUP_LS,
|
||||||
|
RDMA_NL_GROUP_NOTIFY,
|
||||||
RDMA_NL_NUM_GROUPS
|
RDMA_NL_NUM_GROUPS
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -305,6 +306,8 @@ enum rdma_nldev_command {
|
|||||||
|
|
||||||
RDMA_NLDEV_CMD_DELDEV,
|
RDMA_NLDEV_CMD_DELDEV,
|
||||||
|
|
||||||
|
RDMA_NLDEV_CMD_MONITOR,
|
||||||
|
|
||||||
RDMA_NLDEV_NUM_OPS
|
RDMA_NLDEV_NUM_OPS
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -574,6 +577,8 @@ enum rdma_nldev_attr {
|
|||||||
|
|
||||||
RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */
|
RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */
|
||||||
|
|
||||||
|
RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Always the end
|
* Always the end
|
||||||
*/
|
*/
|
||||||
@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type {
|
|||||||
RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
|
RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Supported rdma monitoring event types.
|
||||||
|
*/
|
||||||
|
enum rdma_nl_notify_event_type {
|
||||||
|
RDMA_REGISTER_EVENT,
|
||||||
|
RDMA_UNREGISTER_EVENT,
|
||||||
|
RDMA_NETDEV_ATTACH_EVENT,
|
||||||
|
RDMA_NETDEV_DETACH_EVENT,
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* _UAPI_RDMA_NETLINK_H */
|
#endif /* _UAPI_RDMA_NETLINK_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user