RDMA/counter: Add optional counter support

An optional counter is a driver-specific counter that may be dynamically
enabled/disabled.  This enhancement allows drivers to expose counters
which are, for example, mutually exclusive and cannot be enabled at the
same time, counters that might degrades performance, optional debug
counters, etc.

Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are not
exported in sysfs, and must be at the end of all stats, otherwise the
attr->show() in sysfs would get wrong indexes for hwcounters that are
behind optional counters.

Link: https://lore.kernel.org/r/20211008122439.166063-7-markzhang@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Aharon Landau 2021-10-08 15:24:32 +03:00 committed by Jason Gunthorpe
parent 0dc8968460
commit 5e2ddd1e59
5 changed files with 74 additions and 10 deletions

View File

@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
return ret; return ret;
} }
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable)
{
struct rdma_hw_stats *stats;
int ret = 0;
if (!dev->ops.modify_hw_stat)
return -EOPNOTSUPP;
stats = ib_get_hw_stats_port(dev, port);
if (!stats || index >= stats->num_counters ||
!(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
return -EINVAL;
mutex_lock(&stats->lock);
if (enable != test_bit(index, stats->is_disabled))
goto out;
ret = dev->ops.modify_hw_stat(dev, port, index, enable);
if (ret)
goto out;
if (enable)
clear_bit(index, stats->is_disabled);
else
set_bit(index, stats->is_disabled);
out:
mutex_unlock(&stats->lock);
return ret;
}
static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
struct ib_qp *qp, struct ib_qp *qp,
enum rdma_nl_counter_mode mode) enum rdma_nl_counter_mode mode)

View File

@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device); SET_DEVICE_OP(dev_ops, modify_device);
SET_DEVICE_OP(dev_ops, modify_flow_action_esp); SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_qp);
SET_DEVICE_OP(dev_ops, modify_srq); SET_DEVICE_OP(dev_ops, modify_srq);

View File

@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
{ {
struct hw_stats_device_attribute *attr; struct hw_stats_device_attribute *attr;
struct hw_stats_device_data *data; struct hw_stats_device_data *data;
int i, ret; bool opstat_skipped = false;
int i, ret, pos = 0;
data = alloc_hw_stats_device(ibdev); data = alloc_hw_stats_device(ibdev);
if (IS_ERR(data)) { if (IS_ERR(data)) {
@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
data->stats->timestamp = jiffies; data->stats->timestamp = jiffies;
for (i = 0; i < data->stats->num_counters; i++) { for (i = 0; i < data->stats->num_counters; i++) {
attr = &data->attrs[i]; if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
opstat_skipped = true;
continue;
}
WARN_ON(opstat_skipped);
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.name = data->stats->descs[i].name;
attr->attr.attr.mode = 0444; attr->attr.attr.mode = 0444;
attr->attr.show = hw_stat_device_show; attr->attr.show = hw_stat_device_show;
attr->show = show_hw_stats; attr->show = show_hw_stats;
data->group.attrs[i] = &attr->attr.attr; data->group.attrs[pos] = &attr->attr.attr;
pos++;
} }
attr = &data->attrs[i]; attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = "lifespan"; attr->attr.attr.name = "lifespan";
attr->attr.attr.mode = 0644; attr->attr.attr.mode = 0644;
@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
attr->show = show_stats_lifespan; attr->show = show_stats_lifespan;
attr->attr.store = hw_stat_device_store; attr->attr.store = hw_stat_device_store;
attr->store = set_stats_lifespan; attr->store = set_stats_lifespan;
data->group.attrs[i] = &attr->attr.attr; data->group.attrs[pos] = &attr->attr.attr;
for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
if (!ibdev->groups[i]) { if (!ibdev->groups[i]) {
ibdev->groups[i] = &data->group; ibdev->groups[i] = &data->group;
@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port,
{ {
struct hw_stats_port_attribute *attr; struct hw_stats_port_attribute *attr;
struct hw_stats_port_data *data; struct hw_stats_port_data *data;
int i, ret; bool opstat_skipped = false;
int i, ret, pos = 0;
data = alloc_hw_stats_port(port, group); data = alloc_hw_stats_port(port, group);
if (IS_ERR(data)) if (IS_ERR(data))
@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port,
data->stats->timestamp = jiffies; data->stats->timestamp = jiffies;
for (i = 0; i < data->stats->num_counters; i++) { for (i = 0; i < data->stats->num_counters; i++) {
attr = &data->attrs[i]; if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
opstat_skipped = true;
continue;
}
WARN_ON(opstat_skipped);
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.name = data->stats->descs[i].name;
attr->attr.attr.mode = 0444; attr->attr.attr.mode = 0444;
attr->attr.show = hw_stat_port_show; attr->attr.show = hw_stat_port_show;
attr->show = show_hw_stats; attr->show = show_hw_stats;
group->attrs[i] = &attr->attr.attr; group->attrs[pos] = &attr->attr.attr;
pos++;
} }
attr = &data->attrs[i]; attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = "lifespan"; attr->attr.attr.name = "lifespan";
attr->attr.attr.mode = 0644; attr->attr.attr.mode = 0644;
@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port,
attr->show = show_stats_lifespan; attr->show = show_stats_lifespan;
attr->attr.store = hw_stat_port_store; attr->attr.store = hw_stat_port_store;
attr->store = set_stats_lifespan; attr->store = set_stats_lifespan;
group->attrs[i] = &attr->attr.attr; group->attrs[pos] = &attr->attr.attr;
port->hw_stats_data = data; port->hw_stats_data = data;
return 0; return 0;

View File

@ -545,12 +545,18 @@ enum ib_port_speed {
IB_SPEED_NDR = 128, IB_SPEED_NDR = 128,
}; };
enum ib_stat_flag {
IB_STAT_FLAG_OPTIONAL = 1 << 0,
};
/** /**
* struct rdma_stat_desc * struct rdma_stat_desc
* @name - The name of the counter * @name - The name of the counter
* @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
*/ */
struct rdma_stat_desc { struct rdma_stat_desc {
const char *name; const char *name;
unsigned int flags;
}; };
/** /**
@ -2562,6 +2568,13 @@ struct ib_device_ops {
int (*get_hw_stats)(struct ib_device *device, int (*get_hw_stats)(struct ib_device *device,
struct rdma_hw_stats *stats, u32 port, int index); struct rdma_hw_stats *stats, u32 port, int index);
/**
* modify_hw_stat - Modify the counter configuration
* @enable: true/false when enable/disable a counter
* Return codes - 0 on success or error code otherwise.
*/
int (*modify_hw_stat)(struct ib_device *device, u32 port,
unsigned int counter_index, bool enable);
/** /**
* Allows rdma drivers to add their own restrack attributes. * Allows rdma drivers to add their own restrack attributes.
*/ */

View File

@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask); enum rdma_nl_counter_mask *mask);
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable);
#endif /* _RDMA_COUNTER_H_ */ #endif /* _RDMA_COUNTER_H_ */