mirror of
https://github.com/torvalds/linux.git
synced 2024-11-28 23:21:31 +00:00
2b30f8291a
The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2 specifications (the other being Frame Preemption; IEEE 802.1Q-2018 clause 6.7.2), which work together to minimize latency caused by frame interference at TX. The overall goal of TSN is for normal traffic and traffic with a bounded deadline to be able to cohabitate on the same L2 network and not bother each other too much. The standards achieve this (partly) by introducing the concept of preemptible traffic, i.e. Ethernet frames that have a custom value for the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented and reassembled at L2 on a link-local basis. The non-preemptible frames are called express traffic, they are transmitted using a normal SFD, and they can preempt preemptible frames, therefore having lower latency, which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo frames around 9K). Preemption is not recursive, i.e. a P frame cannot preempt another P frame. Preemption also does not depend upon priority, or otherwise said, an E frame with prio 0 will still preempt a P frame with prio 7. In terms of implementation, the standards talk about the presence of an express MAC (eMAC) which handles express traffic, and a preemptible MAC (pMAC) which handles preemptible traffic, and these MACs are multiplexed on the same MII by a MAC merge layer. To support frame preemption, the definition of the SFD was generalized to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an Ethernet frame fragment, or a complete frame. Stations unaware of an SMD value different from the standard SFD will treat P frames as error frames. To prevent that from happening, a negotiation process is defined. On RX, packets are dispatched to the eMAC or pMAC after being filtered by their SMD. On TX, the eMAC/pMAC classification decision is taken by the 802.1Q spec, based on packet priority (each of the 8 user priority values may have an admin-status of preemptible or express). The MAC Merge layer and the Frame Preemption parameters have some degree of independence in terms of how software stacks are supposed to deal with them. The activation of the MM layer is supposed to be controlled by an LLDP daemon (after it has been communicated that the link partner also supports it), after which a (hardware-based or not) verification handshake takes place, before actually enabling the feature. So the process is intended to be relatively plug-and-play. Whereas FP settings are supposed to be coordinated across a network using something approximating NETCONF. The support contained here is exclusively for the 802.3 (MAC Merge) portions and not for the 802.1Q (Frame Preemption) parts. This API is sufficient for an LLDP daemon to do its job. The FP adminStatus variable from 802.1Q is outside the scope of an LLDP daemon. I have taken a few creative licenses and augmented the Linux kernel UAPI compared to the standard managed objects recommended by IEEE 802.3. These are: - ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive Processing state diagram, a MAC Merge layer is always supposed to be able to receive P frames. However, this implies keeping the pMAC powered on, which will consume needless power in applications where FP will never be used. If LLDP is used, the reception of an Additional Ethernet Capabilities TLV from the link partner is sufficient indication that the pMAC should be enabled. So my proposal is that in Linux, we keep the pMAC turned off by default and that user space turns it on when needed. - ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in the boolean netlink attributes offered, so this is also positive here. Other than the meaning being reversed, they correspond to the same thing. - ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP daemon to maximize the verifyTime variable (delay between SMD-V transmissions), to maximize its chances that the LP replies. IEEE says that the verifyTime can range between 1 and 128 ms, but the NXP ENETC stupidly keeps this variable in a 7 bit register, so the maximum supported value is 127 ms. I could have chosen to hardcode this in the LLDP daemon to a lower value, but why not let the kernel expose its supported range directly. - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called aMACMergeAddFragSize, and expresses the "additional" fragment size (on top of ETH_ZLEN), whereas this expresses the absolute value of the fragment size. - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed object mandated by the standard, but user space clearly needs to know what is the minimum supported fragment size of our local receiver, since LLDP must advertise a value no lower than that. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
256 lines
7.3 KiB
C
256 lines
7.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright 2022-2023 NXP
|
|
*/
|
|
#include "common.h"
|
|
#include "netlink.h"
|
|
|
|
struct mm_req_info {
|
|
struct ethnl_req_info base;
|
|
};
|
|
|
|
struct mm_reply_data {
|
|
struct ethnl_reply_data base;
|
|
struct ethtool_mm_state state;
|
|
struct ethtool_mm_stats stats;
|
|
};
|
|
|
|
#define MM_REPDATA(__reply_base) \
|
|
container_of(__reply_base, struct mm_reply_data, base)
|
|
|
|
#define ETHTOOL_MM_STAT_CNT \
|
|
(__ETHTOOL_A_MM_STAT_CNT - (ETHTOOL_A_MM_STAT_PAD + 1))
|
|
|
|
const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = {
|
|
[ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats),
|
|
};
|
|
|
|
static int mm_prepare_data(const struct ethnl_req_info *req_base,
|
|
struct ethnl_reply_data *reply_base,
|
|
struct genl_info *info)
|
|
{
|
|
struct mm_reply_data *data = MM_REPDATA(reply_base);
|
|
struct net_device *dev = reply_base->dev;
|
|
const struct ethtool_ops *ops;
|
|
int ret;
|
|
|
|
ops = dev->ethtool_ops;
|
|
|
|
if (!ops->get_mm)
|
|
return -EOPNOTSUPP;
|
|
|
|
ethtool_stats_init((u64 *)&data->stats,
|
|
sizeof(data->stats) / sizeof(u64));
|
|
|
|
ret = ethnl_ops_begin(dev);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = ops->get_mm(dev, &data->state);
|
|
if (ret)
|
|
goto out_complete;
|
|
|
|
if (ops->get_mm_stats && (req_base->flags & ETHTOOL_FLAG_STATS))
|
|
ops->get_mm_stats(dev, &data->stats);
|
|
|
|
out_complete:
|
|
ethnl_ops_complete(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mm_reply_size(const struct ethnl_req_info *req_base,
|
|
const struct ethnl_reply_data *reply_base)
|
|
{
|
|
int len = 0;
|
|
|
|
len += nla_total_size(sizeof(u8)); /* _MM_PMAC_ENABLED */
|
|
len += nla_total_size(sizeof(u8)); /* _MM_TX_ENABLED */
|
|
len += nla_total_size(sizeof(u8)); /* _MM_TX_ACTIVE */
|
|
len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_ENABLED */
|
|
len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_STATUS */
|
|
len += nla_total_size(sizeof(u32)); /* _MM_VERIFY_TIME */
|
|
len += nla_total_size(sizeof(u32)); /* _MM_MAX_VERIFY_TIME */
|
|
len += nla_total_size(sizeof(u32)); /* _MM_TX_MIN_FRAG_SIZE */
|
|
len += nla_total_size(sizeof(u32)); /* _MM_RX_MIN_FRAG_SIZE */
|
|
|
|
if (req_base->flags & ETHTOOL_FLAG_STATS)
|
|
len += nla_total_size(0) + /* _MM_STATS */
|
|
nla_total_size_64bit(sizeof(u64)) * ETHTOOL_MM_STAT_CNT;
|
|
|
|
return len;
|
|
}
|
|
|
|
static int mm_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
|
|
{
|
|
if (val == ETHTOOL_STAT_NOT_SET)
|
|
return 0;
|
|
if (nla_put_u64_64bit(skb, attrtype, val, ETHTOOL_A_MM_STAT_PAD))
|
|
return -EMSGSIZE;
|
|
return 0;
|
|
}
|
|
|
|
static int mm_put_stats(struct sk_buff *skb,
|
|
const struct ethtool_mm_stats *stats)
|
|
{
|
|
struct nlattr *nest;
|
|
|
|
nest = nla_nest_start(skb, ETHTOOL_A_MM_STATS);
|
|
if (!nest)
|
|
return -EMSGSIZE;
|
|
|
|
if (mm_put_stat(skb, stats->MACMergeFrameAssErrorCount,
|
|
ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) ||
|
|
mm_put_stat(skb, stats->MACMergeFrameSmdErrorCount,
|
|
ETHTOOL_A_MM_STAT_SMD_ERRORS) ||
|
|
mm_put_stat(skb, stats->MACMergeFrameAssOkCount,
|
|
ETHTOOL_A_MM_STAT_REASSEMBLY_OK) ||
|
|
mm_put_stat(skb, stats->MACMergeFragCountRx,
|
|
ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) ||
|
|
mm_put_stat(skb, stats->MACMergeFragCountTx,
|
|
ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) ||
|
|
mm_put_stat(skb, stats->MACMergeHoldCount,
|
|
ETHTOOL_A_MM_STAT_HOLD_COUNT))
|
|
goto err_cancel;
|
|
|
|
nla_nest_end(skb, nest);
|
|
return 0;
|
|
|
|
err_cancel:
|
|
nla_nest_cancel(skb, nest);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int mm_fill_reply(struct sk_buff *skb,
|
|
const struct ethnl_req_info *req_base,
|
|
const struct ethnl_reply_data *reply_base)
|
|
{
|
|
const struct mm_reply_data *data = MM_REPDATA(reply_base);
|
|
const struct ethtool_mm_state *state = &data->state;
|
|
|
|
if (nla_put_u8(skb, ETHTOOL_A_MM_TX_ENABLED, state->tx_enabled) ||
|
|
nla_put_u8(skb, ETHTOOL_A_MM_TX_ACTIVE, state->tx_active) ||
|
|
nla_put_u8(skb, ETHTOOL_A_MM_PMAC_ENABLED, state->pmac_enabled) ||
|
|
nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_ENABLED, state->verify_enabled) ||
|
|
nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_STATUS, state->verify_status) ||
|
|
nla_put_u32(skb, ETHTOOL_A_MM_VERIFY_TIME, state->verify_time) ||
|
|
nla_put_u32(skb, ETHTOOL_A_MM_MAX_VERIFY_TIME, state->max_verify_time) ||
|
|
nla_put_u32(skb, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, state->tx_min_frag_size) ||
|
|
nla_put_u32(skb, ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, state->rx_min_frag_size))
|
|
return -EMSGSIZE;
|
|
|
|
if (req_base->flags & ETHTOOL_FLAG_STATS &&
|
|
mm_put_stats(skb, &data->stats))
|
|
return -EMSGSIZE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct ethnl_request_ops ethnl_mm_request_ops = {
|
|
.request_cmd = ETHTOOL_MSG_MM_GET,
|
|
.reply_cmd = ETHTOOL_MSG_MM_GET_REPLY,
|
|
.hdr_attr = ETHTOOL_A_MM_HEADER,
|
|
.req_info_size = sizeof(struct mm_req_info),
|
|
.reply_data_size = sizeof(struct mm_reply_data),
|
|
|
|
.prepare_data = mm_prepare_data,
|
|
.reply_size = mm_reply_size,
|
|
.fill_reply = mm_fill_reply,
|
|
};
|
|
|
|
const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1] = {
|
|
[ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
|
|
[ETHTOOL_A_MM_VERIFY_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
|
|
[ETHTOOL_A_MM_VERIFY_TIME] = NLA_POLICY_RANGE(NLA_U32, 1, 128),
|
|
[ETHTOOL_A_MM_TX_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
|
|
[ETHTOOL_A_MM_PMAC_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
|
|
[ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = NLA_POLICY_RANGE(NLA_U32, 60, 252),
|
|
};
|
|
|
|
static void mm_state_to_cfg(const struct ethtool_mm_state *state,
|
|
struct ethtool_mm_cfg *cfg)
|
|
{
|
|
/* We could also compare state->verify_status against
|
|
* ETHTOOL_MM_VERIFY_STATUS_DISABLED, but state->verify_enabled
|
|
* is more like an administrative state which should be seen in
|
|
* ETHTOOL_MSG_MM_GET replies. For example, a port with verification
|
|
* disabled might be in the ETHTOOL_MM_VERIFY_STATUS_INITIAL
|
|
* if it's down.
|
|
*/
|
|
cfg->verify_enabled = state->verify_enabled;
|
|
cfg->verify_time = state->verify_time;
|
|
cfg->tx_enabled = state->tx_enabled;
|
|
cfg->pmac_enabled = state->pmac_enabled;
|
|
cfg->tx_min_frag_size = state->tx_min_frag_size;
|
|
}
|
|
|
|
int ethnl_set_mm(struct sk_buff *skb, struct genl_info *info)
|
|
{
|
|
struct netlink_ext_ack *extack = info->extack;
|
|
struct ethnl_req_info req_info = {};
|
|
struct ethtool_mm_state state = {};
|
|
struct nlattr **tb = info->attrs;
|
|
struct ethtool_mm_cfg cfg = {};
|
|
const struct ethtool_ops *ops;
|
|
struct net_device *dev;
|
|
bool mod = false;
|
|
int ret;
|
|
|
|
ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_MM_HEADER],
|
|
genl_info_net(info), extack, true);
|
|
if (ret)
|
|
return ret;
|
|
|
|
dev = req_info.dev;
|
|
ops = dev->ethtool_ops;
|
|
|
|
if (!ops->get_mm || !ops->set_mm) {
|
|
ret = -EOPNOTSUPP;
|
|
goto out_dev_put;
|
|
}
|
|
|
|
rtnl_lock();
|
|
ret = ethnl_ops_begin(dev);
|
|
if (ret < 0)
|
|
goto out_rtnl_unlock;
|
|
|
|
ret = ops->get_mm(dev, &state);
|
|
if (ret)
|
|
goto out_complete;
|
|
|
|
mm_state_to_cfg(&state, &cfg);
|
|
|
|
ethnl_update_bool(&cfg.verify_enabled, tb[ETHTOOL_A_MM_VERIFY_ENABLED],
|
|
&mod);
|
|
ethnl_update_u32(&cfg.verify_time, tb[ETHTOOL_A_MM_VERIFY_TIME], &mod);
|
|
ethnl_update_bool(&cfg.tx_enabled, tb[ETHTOOL_A_MM_TX_ENABLED], &mod);
|
|
ethnl_update_bool(&cfg.pmac_enabled, tb[ETHTOOL_A_MM_PMAC_ENABLED],
|
|
&mod);
|
|
ethnl_update_u32(&cfg.tx_min_frag_size,
|
|
tb[ETHTOOL_A_MM_TX_MIN_FRAG_SIZE], &mod);
|
|
|
|
if (!mod)
|
|
goto out_complete;
|
|
|
|
if (cfg.verify_time > state.max_verify_time) {
|
|
NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MM_VERIFY_TIME],
|
|
"verifyTime exceeds device maximum");
|
|
ret = -ERANGE;
|
|
goto out_complete;
|
|
}
|
|
|
|
ret = ops->set_mm(dev, &cfg, extack);
|
|
if (ret)
|
|
goto out_complete;
|
|
|
|
ethtool_notify(dev, ETHTOOL_MSG_MM_NTF, NULL);
|
|
|
|
out_complete:
|
|
ethnl_ops_complete(dev);
|
|
out_rtnl_unlock:
|
|
rtnl_unlock();
|
|
out_dev_put:
|
|
ethnl_parse_header_dev_put(&req_info);
|
|
return ret;
|
|
}
|