RDMA/nldev: Add support for RDMA monitoring

Introduce a new netlink command to allow rdma event monitoring.
The rdma events supported now are IB device
registration/unregistration and net device attachment/detachment.

Example output of rdma monitor and the commands which trigger
the events:

$ rdma monitor
$ rmmod mlx5_ib
[UNREGISTER]	dev 1 rocep8s0f1
[UNREGISTER]	dev 0 rocep8s0f0

$ modprobe mlx5_ib
[REGISTER]	dev 2 mlx5_0
[NETDEV_ATTACH]	dev 2 mlx5_0 port 1 netdev 4 eth2
[REGISTER]	dev 3 mlx5_1
[NETDEV_ATTACH]	dev 3 mlx5_1 port 1 netdev 5 eth3

$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
[UNREGISTER]	dev 2 rocep8s0f0
[REGISTER]	dev 4 mlx5_0
[NETDEV_ATTACH]	dev 4 mlx5_0 port 30 netdev 4 eth2

$ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 2 netdev 7 eth4
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 3 netdev 8 eth5
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 4 netdev 9 eth6
[NETDEV_ATTACH]	dev 4 rdmap8s0f0 port 5 netdev 10 eth7
[REGISTER]	dev 5 mlx5_0
[NETDEV_ATTACH]	dev 5 mlx5_0 port 1 netdev 11 eth8
[REGISTER]	dev 6 mlx5_0
[NETDEV_ATTACH]	dev 6 mlx5_0 port 1 netdev 12 eth9
[REGISTER]	dev 7 mlx5_0
[NETDEV_ATTACH]	dev 7 mlx5_0 port 1 netdev 13 eth10
[REGISTER]	dev 8 mlx5_0
[NETDEV_ATTACH]	dev 8 mlx5_0 port 1 netdev 14 eth11

$ echo 0 > /sys/class/net/eth2/device/sriov_numvfs
[UNREGISTER]	dev 5 rocep8s0f0v0
[UNREGISTER]	dev 6 rocep8s0f0v1
[UNREGISTER]	dev 7 rocep8s0f0v2
[UNREGISTER]	dev 8 rocep8s0f0v3
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 2
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 3
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 4
[NETDEV_DETACH]	dev 4 rdmap8s0f0 port 5

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909173025.30422-7-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Chiara Meiohas 2024-09-09 20:30:24 +03:00 committed by Leon Romanovsky
parent 8d159eb211
commit 9cbed5aab5
5 changed files with 187 additions and 0 deletions

View File

@ -1351,6 +1351,29 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
{
}
static void ib_device_notify_register(struct ib_device *device)
{
struct net_device *netdev;
u32 port;
int ret;
ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
if (ret)
return;
rdma_for_each_port(device, port) {
netdev = ib_device_get_netdev(device, port);
if (!netdev)
continue;
ret = rdma_nl_notify_event(device, port,
RDMA_NETDEV_ATTACH_EVENT);
dev_put(netdev);
if (ret)
return;
}
}
/**
* ib_register_device - Register an IB device with IB core
* @device: Device to register
@ -1449,6 +1472,8 @@ int ib_register_device(struct ib_device *device, const char *name,
dev_set_uevent_suppress(&device->dev, false);
/* Mark for userspace that device is ready */
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_notify_register(device);
ib_device_put(device);
return 0;
@ -1491,6 +1516,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
goto out;
disable_device(ib_dev);
rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
/* Expedite removing unregistered pointers from the hash table */
free_netdevs(ib_dev);
@ -2159,6 +2185,7 @@ static void add_ndev_hash(struct ib_port_data *pdata)
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
u32 port)
{
enum rdma_nl_notify_event_type etype;
struct net_device *old_ndev;
struct ib_port_data *pdata;
unsigned long flags;
@ -2190,6 +2217,14 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
add_ndev_hash(pdata);
/* Make sure that the device is registered before we send events */
if (xa_load(&devices, ib_dev->index) != ib_dev)
return 0;
etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
rdma_nl_notify_event(ib_dev, port, etype);
return 0;
}
EXPORT_SYMBOL(ib_device_set_netdev);

View File

@ -311,6 +311,7 @@ int rdma_nl_net_init(struct rdma_dev_net *rnet)
struct net *net = read_pnet(&rnet->net);
struct netlink_kernel_cfg cfg = {
.input = rdma_nl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
};
struct sock *nls;

View File

@ -170,6 +170,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@ -2722,6 +2723,129 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
},
};
static int fill_mon_netdev_association(struct sk_buff *msg,
struct ib_device *device, u32 port,
const struct net *net)
{
struct net_device *netdev = ib_device_get_netdev(device, port);
int ret = 0;
if (netdev && !net_eq(dev_net(netdev), net))
goto out;
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
if (ret)
goto out;
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
dev_name(&device->dev));
if (ret)
goto out;
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
if (ret)
goto out;
if (netdev) {
ret = nla_put_u32(msg,
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
if (ret)
goto out;
ret = nla_put_string(msg,
RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
}
out:
dev_put(netdev);
return ret;
}
static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
enum rdma_nl_notify_event_type type)
{
struct net_device *netdev;
switch (type) {
case RDMA_REGISTER_EVENT:
dev_warn_ratelimited(&device->dev,
"Failed to send RDMA monitor register device event\n");
break;
case RDMA_UNREGISTER_EVENT:
dev_warn_ratelimited(&device->dev,
"Failed to send RDMA monitor unregister device event\n");
break;
case RDMA_NETDEV_ATTACH_EVENT:
netdev = ib_device_get_netdev(device, port_num);
dev_warn_ratelimited(&device->dev,
"Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
port_num, netdev->ifindex);
dev_put(netdev);
break;
case RDMA_NETDEV_DETACH_EVENT:
dev_warn_ratelimited(&device->dev,
"Failed to send RDMA monitor netdev detach event: port %d\n",
port_num);
default:
break;
}
}
int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
enum rdma_nl_notify_event_type type)
{
struct sk_buff *skb;
struct net *net;
int ret = 0;
void *nlh;
net = read_pnet(&device->coredev.rdma_net);
if (!net)
return -EINVAL;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
return -ENOMEM;
nlh = nlmsg_put(skb, 0, 0,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
0, 0);
switch (type) {
case RDMA_REGISTER_EVENT:
case RDMA_UNREGISTER_EVENT:
ret = fill_nldev_handle(skb, device);
if (ret)
goto err_free;
break;
case RDMA_NETDEV_ATTACH_EVENT:
case RDMA_NETDEV_DETACH_EVENT:
ret = fill_mon_netdev_association(skb, device,
port_num, net);
if (ret)
goto err_free;
break;
default:
break;
}
ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
if (ret)
goto err_free;
nlmsg_end(skb, nlh);
ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
if (ret && ret != -ESRCH) {
skb = NULL; /* skb is freed in the netlink send-op handling */
goto err_free;
}
return 0;
err_free:
rdma_nl_notify_err_msg(device, port_num, type);
nlmsg_free(skb);
return ret;
}
void __init nldev_init(void)
{
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);

View File

@ -6,6 +6,8 @@
#include <linux/netlink.h>
#include <uapi/rdma/rdma_netlink.h>
struct ib_device;
enum {
RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
*/
bool rdma_nl_chk_listeners(unsigned int group);
/**
* Prepare and send an event message
* @ib: the IB device which triggered the event
* @port_num: the port number which triggered the event - 0 if unused
* @type: the event type
* Returns 0 on success or a negative error code
*/
int rdma_nl_notify_event(struct ib_device *ib, u32 port_num,
enum rdma_nl_notify_event_type type);
struct rdma_link_ops {
struct list_head list;
const char *type;

View File

@ -15,6 +15,7 @@ enum {
enum {
RDMA_NL_GROUP_IWPM = 2,
RDMA_NL_GROUP_LS,
RDMA_NL_GROUP_NOTIFY,
RDMA_NL_NUM_GROUPS
};
@ -305,6 +306,8 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_DELDEV,
RDMA_NLDEV_CMD_MONITOR,
RDMA_NLDEV_NUM_OPS
};
@ -574,6 +577,8 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */
RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */
/*
* Always the end
*/
@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type {
RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
};
/*
* Supported rdma monitoring event types.
*/
enum rdma_nl_notify_event_type {
RDMA_REGISTER_EVENT,
RDMA_UNREGISTER_EVENT,
RDMA_NETDEV_ATTACH_EVENT,
RDMA_NETDEV_DETACH_EVENT,
};
#endif /* _UAPI_RDMA_NETLINK_H */