2019-11-14 15:03:27 +00:00
|
|
|
/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
|
|
|
|
/* Copyright (c) 2017 Microsemi Corporation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _SOC_MSCC_OCELOT_H
|
|
|
|
#define _SOC_MSCC_OCELOT_H
|
|
|
|
|
|
|
|
#include <linux/ptp_clock_kernel.h>
|
|
|
|
#include <linux/net_tstamp.h>
|
|
|
|
#include <linux/if_vlan.h>
|
|
|
|
#include <linux/regmap.h>
|
|
|
|
#include <net/dsa.h>
|
|
|
|
|
net: dsa: felix: Allow unknown unicast traffic towards the CPU port module
Compared to other DSA switches, in the Ocelot cores, the RX filtering is
a much more important concern.
Firstly, the primary use case for Ocelot is non-DSA, so there isn't any
secondary Ethernet MAC [the DSA master's one] to implicitly drop frames
having a DMAC we are not interested in. So the switch driver itself
needs to install FDB entries towards the CPU port module (PGID_CPU) for
the MAC address of each switch port, in each VLAN installed on the port.
Every address that is not whitelisted is implicitly dropped. This is in
order to achieve a behavior similar to N standalone net devices.
Secondly, even in the secondary use case of DSA, such as illustrated by
Felix with the NPI port mode, that secondary Ethernet MAC is present,
but its RX filter is bypassed. This is because the DSA tags themselves
are placed before Ethernet, so the DMAC that the switch ports see is
not seen by the DSA master too (since it's shifter to the right).
So RX filtering is pretty important. A good RX filter won't bother the
CPU in case the switch port receives a frame that it's not interested
in, and there exists no other line of defense.
Ocelot is pretty strict when it comes to RX filtering: non-IP multicast
and broadcast traffic is allowed to go to the CPU port module, but
unknown unicast isn't. This means that traffic reception for any other
MAC addresses than the ones configured on each switch port net device
won't work. This includes use cases such as macvlan or bridging with a
non-Ocelot (so-called "foreign") interface. But this seems to be fine
for the scenarios that the Linux system embedded inside an Ocelot switch
is intended for - it is simply not interested in unknown unicast
traffic, as explained in Allan Nielsen's presentation [0].
On the other hand, the Felix DSA switch is integrated in more
general-purpose Linux systems, so it can't afford to drop that sort of
traffic in hardware, even if it will end up doing so later, in software.
Actually, unknown unicast means more for Felix than it does for Ocelot.
Felix doesn't attempt to perform the whitelisting of switch port MAC
addresses towards PGID_CPU at all, mainly because it is too complicated
to be feasible: while the MAC addresses are unique in Ocelot, by default
in DSA all ports are equal and inherited from the DSA master. This adds
into account the question of reference counting MAC addresses (delayed
ocelot_mact_forget), not to mention reference counting for the VLAN IDs
that those MAC addresses are installed in. This reference counting
should be done in the DSA core, and the fact that it wasn't needed so
far is due to the fact that the other DSA switches don't have the DSA
tag placed before Ethernet, so the DSA master is able to whitelist the
MAC addresses in hardware.
So this means that even regular traffic termination on a Felix switch
port happens through flooding (because neither Felix nor Ocelot learn
source MAC addresses from CPU-injected frames).
So far we've explained that whitelisting towards PGID_CPU:
- helps to reduce the likelihood of spamming the CPU with frames it
won't process very far anyway
- is implemented in the ocelot driver
- is sufficient for the ocelot use cases
- is not feasible in DSA
- breaks use cases in DSA, in the current status (whitelisting enabled
but no MAC address whitelisted)
So the proposed patch allows unknown unicast frames to be sent to the
CPU port module. This is done for the Felix DSA driver only, as Ocelot
seems to be happy without it.
[0]: https://www.youtube.com/watch?v=B1HhxEcU7Jg
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:03 +00:00
|
|
|
/* Port Group IDs (PGID) are masks of destination ports.
|
|
|
|
*
|
|
|
|
* For L2 forwarding, the switch performs 3 lookups in the PGID table for each
|
|
|
|
* frame, and forwards the frame to the ports that are present in the logical
|
|
|
|
* AND of all 3 PGIDs.
|
|
|
|
*
|
|
|
|
* These PGID lookups are:
|
|
|
|
* - In one of PGID[0-63]: for the destination masks. There are 2 paths by
|
|
|
|
* which the switch selects a destination PGID:
|
|
|
|
* - The {DMAC, VID} is present in the MAC table. In that case, the
|
|
|
|
* destination PGID is given by the DEST_IDX field of the MAC table entry
|
|
|
|
* that matched.
|
|
|
|
* - The {DMAC, VID} is not present in the MAC table (it is unknown). The
|
|
|
|
* frame is disseminated as being either unicast, multicast or broadcast,
|
|
|
|
* and according to that, the destination PGID is chosen as being the
|
|
|
|
* value contained by ANA_FLOODING_FLD_UNICAST,
|
|
|
|
* ANA_FLOODING_FLD_MULTICAST or ANA_FLOODING_FLD_BROADCAST.
|
|
|
|
* The destination PGID can be an unicast set: the first PGIDs, 0 to
|
|
|
|
* ocelot->num_phys_ports - 1, or a multicast set: the PGIDs from
|
|
|
|
* ocelot->num_phys_ports to 63. By convention, a unicast PGID corresponds to
|
|
|
|
* a physical port and has a single bit set in the destination ports mask:
|
|
|
|
* that corresponding to the port number itself. In contrast, a multicast
|
|
|
|
* PGID will have potentially more than one single bit set in the destination
|
|
|
|
* ports mask.
|
|
|
|
* - In one of PGID[64-79]: for the aggregation mask. The switch classifier
|
|
|
|
* dissects each frame and generates a 4-bit Link Aggregation Code which is
|
|
|
|
* used for this second PGID table lookup. The goal of link aggregation is to
|
|
|
|
* hash multiple flows within the same LAG on to different destination ports.
|
|
|
|
* The first lookup will result in a PGID with all the LAG members present in
|
|
|
|
* the destination ports mask, and the second lookup, by Link Aggregation
|
|
|
|
* Code, will ensure that each flow gets forwarded only to a single port out
|
|
|
|
* of that mask (there are no duplicates).
|
|
|
|
* - In one of PGID[80-90]: for the source mask. The third time, the PGID table
|
|
|
|
* is indexed with the ingress port (plus 80). These PGIDs answer the
|
|
|
|
* question "is port i allowed to forward traffic to port j?" If yes, then
|
|
|
|
* BIT(j) of PGID 80+i will be found set. The third PGID lookup can be used
|
|
|
|
* to enforce the L2 forwarding matrix imposed by e.g. a Linux bridge.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Reserve some destination PGIDs at the end of the range:
|
2021-03-16 20:10:17 +00:00
|
|
|
* PGID_BLACKHOLE: used for not forwarding the frames
|
net: dsa: felix: Allow unknown unicast traffic towards the CPU port module
Compared to other DSA switches, in the Ocelot cores, the RX filtering is
a much more important concern.
Firstly, the primary use case for Ocelot is non-DSA, so there isn't any
secondary Ethernet MAC [the DSA master's one] to implicitly drop frames
having a DMAC we are not interested in. So the switch driver itself
needs to install FDB entries towards the CPU port module (PGID_CPU) for
the MAC address of each switch port, in each VLAN installed on the port.
Every address that is not whitelisted is implicitly dropped. This is in
order to achieve a behavior similar to N standalone net devices.
Secondly, even in the secondary use case of DSA, such as illustrated by
Felix with the NPI port mode, that secondary Ethernet MAC is present,
but its RX filter is bypassed. This is because the DSA tags themselves
are placed before Ethernet, so the DMAC that the switch ports see is
not seen by the DSA master too (since it's shifter to the right).
So RX filtering is pretty important. A good RX filter won't bother the
CPU in case the switch port receives a frame that it's not interested
in, and there exists no other line of defense.
Ocelot is pretty strict when it comes to RX filtering: non-IP multicast
and broadcast traffic is allowed to go to the CPU port module, but
unknown unicast isn't. This means that traffic reception for any other
MAC addresses than the ones configured on each switch port net device
won't work. This includes use cases such as macvlan or bridging with a
non-Ocelot (so-called "foreign") interface. But this seems to be fine
for the scenarios that the Linux system embedded inside an Ocelot switch
is intended for - it is simply not interested in unknown unicast
traffic, as explained in Allan Nielsen's presentation [0].
On the other hand, the Felix DSA switch is integrated in more
general-purpose Linux systems, so it can't afford to drop that sort of
traffic in hardware, even if it will end up doing so later, in software.
Actually, unknown unicast means more for Felix than it does for Ocelot.
Felix doesn't attempt to perform the whitelisting of switch port MAC
addresses towards PGID_CPU at all, mainly because it is too complicated
to be feasible: while the MAC addresses are unique in Ocelot, by default
in DSA all ports are equal and inherited from the DSA master. This adds
into account the question of reference counting MAC addresses (delayed
ocelot_mact_forget), not to mention reference counting for the VLAN IDs
that those MAC addresses are installed in. This reference counting
should be done in the DSA core, and the fact that it wasn't needed so
far is due to the fact that the other DSA switches don't have the DSA
tag placed before Ethernet, so the DSA master is able to whitelist the
MAC addresses in hardware.
So this means that even regular traffic termination on a Felix switch
port happens through flooding (because neither Felix nor Ocelot learn
source MAC addresses from CPU-injected frames).
So far we've explained that whitelisting towards PGID_CPU:
- helps to reduce the likelihood of spamming the CPU with frames it
won't process very far anyway
- is implemented in the ocelot driver
- is sufficient for the ocelot use cases
- is not feasible in DSA
- breaks use cases in DSA, in the current status (whitelisting enabled
but no MAC address whitelisted)
So the proposed patch allows unknown unicast frames to be sent to the
CPU port module. This is done for the Felix DSA driver only, as Ocelot
seems to be happy without it.
[0]: https://www.youtube.com/watch?v=B1HhxEcU7Jg
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:03 +00:00
|
|
|
* PGID_CPU: used for whitelisting certain MAC addresses, such as the addresses
|
|
|
|
* of the switch port net devices, towards the CPU port module.
|
|
|
|
* PGID_UC: the flooding destinations for unknown unicast traffic.
|
2021-02-12 15:15:58 +00:00
|
|
|
* PGID_MC: the flooding destinations for non-IP multicast traffic.
|
net: dsa: felix: Allow unknown unicast traffic towards the CPU port module
Compared to other DSA switches, in the Ocelot cores, the RX filtering is
a much more important concern.
Firstly, the primary use case for Ocelot is non-DSA, so there isn't any
secondary Ethernet MAC [the DSA master's one] to implicitly drop frames
having a DMAC we are not interested in. So the switch driver itself
needs to install FDB entries towards the CPU port module (PGID_CPU) for
the MAC address of each switch port, in each VLAN installed on the port.
Every address that is not whitelisted is implicitly dropped. This is in
order to achieve a behavior similar to N standalone net devices.
Secondly, even in the secondary use case of DSA, such as illustrated by
Felix with the NPI port mode, that secondary Ethernet MAC is present,
but its RX filter is bypassed. This is because the DSA tags themselves
are placed before Ethernet, so the DMAC that the switch ports see is
not seen by the DSA master too (since it's shifter to the right).
So RX filtering is pretty important. A good RX filter won't bother the
CPU in case the switch port receives a frame that it's not interested
in, and there exists no other line of defense.
Ocelot is pretty strict when it comes to RX filtering: non-IP multicast
and broadcast traffic is allowed to go to the CPU port module, but
unknown unicast isn't. This means that traffic reception for any other
MAC addresses than the ones configured on each switch port net device
won't work. This includes use cases such as macvlan or bridging with a
non-Ocelot (so-called "foreign") interface. But this seems to be fine
for the scenarios that the Linux system embedded inside an Ocelot switch
is intended for - it is simply not interested in unknown unicast
traffic, as explained in Allan Nielsen's presentation [0].
On the other hand, the Felix DSA switch is integrated in more
general-purpose Linux systems, so it can't afford to drop that sort of
traffic in hardware, even if it will end up doing so later, in software.
Actually, unknown unicast means more for Felix than it does for Ocelot.
Felix doesn't attempt to perform the whitelisting of switch port MAC
addresses towards PGID_CPU at all, mainly because it is too complicated
to be feasible: while the MAC addresses are unique in Ocelot, by default
in DSA all ports are equal and inherited from the DSA master. This adds
into account the question of reference counting MAC addresses (delayed
ocelot_mact_forget), not to mention reference counting for the VLAN IDs
that those MAC addresses are installed in. This reference counting
should be done in the DSA core, and the fact that it wasn't needed so
far is due to the fact that the other DSA switches don't have the DSA
tag placed before Ethernet, so the DSA master is able to whitelist the
MAC addresses in hardware.
So this means that even regular traffic termination on a Felix switch
port happens through flooding (because neither Felix nor Ocelot learn
source MAC addresses from CPU-injected frames).
So far we've explained that whitelisting towards PGID_CPU:
- helps to reduce the likelihood of spamming the CPU with frames it
won't process very far anyway
- is implemented in the ocelot driver
- is sufficient for the ocelot use cases
- is not feasible in DSA
- breaks use cases in DSA, in the current status (whitelisting enabled
but no MAC address whitelisted)
So the proposed patch allows unknown unicast frames to be sent to the
CPU port module. This is done for the Felix DSA driver only, as Ocelot
seems to be happy without it.
[0]: https://www.youtube.com/watch?v=B1HhxEcU7Jg
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:03 +00:00
|
|
|
* PGID_MCIPV4: the flooding destinations for IPv4 multicast traffic.
|
|
|
|
* PGID_MCIPV6: the flooding destinations for IPv6 multicast traffic.
|
2021-02-12 15:15:58 +00:00
|
|
|
* PGID_BC: the flooding destinations for broadcast traffic.
|
net: dsa: felix: Allow unknown unicast traffic towards the CPU port module
Compared to other DSA switches, in the Ocelot cores, the RX filtering is
a much more important concern.
Firstly, the primary use case for Ocelot is non-DSA, so there isn't any
secondary Ethernet MAC [the DSA master's one] to implicitly drop frames
having a DMAC we are not interested in. So the switch driver itself
needs to install FDB entries towards the CPU port module (PGID_CPU) for
the MAC address of each switch port, in each VLAN installed on the port.
Every address that is not whitelisted is implicitly dropped. This is in
order to achieve a behavior similar to N standalone net devices.
Secondly, even in the secondary use case of DSA, such as illustrated by
Felix with the NPI port mode, that secondary Ethernet MAC is present,
but its RX filter is bypassed. This is because the DSA tags themselves
are placed before Ethernet, so the DMAC that the switch ports see is
not seen by the DSA master too (since it's shifter to the right).
So RX filtering is pretty important. A good RX filter won't bother the
CPU in case the switch port receives a frame that it's not interested
in, and there exists no other line of defense.
Ocelot is pretty strict when it comes to RX filtering: non-IP multicast
and broadcast traffic is allowed to go to the CPU port module, but
unknown unicast isn't. This means that traffic reception for any other
MAC addresses than the ones configured on each switch port net device
won't work. This includes use cases such as macvlan or bridging with a
non-Ocelot (so-called "foreign") interface. But this seems to be fine
for the scenarios that the Linux system embedded inside an Ocelot switch
is intended for - it is simply not interested in unknown unicast
traffic, as explained in Allan Nielsen's presentation [0].
On the other hand, the Felix DSA switch is integrated in more
general-purpose Linux systems, so it can't afford to drop that sort of
traffic in hardware, even if it will end up doing so later, in software.
Actually, unknown unicast means more for Felix than it does for Ocelot.
Felix doesn't attempt to perform the whitelisting of switch port MAC
addresses towards PGID_CPU at all, mainly because it is too complicated
to be feasible: while the MAC addresses are unique in Ocelot, by default
in DSA all ports are equal and inherited from the DSA master. This adds
into account the question of reference counting MAC addresses (delayed
ocelot_mact_forget), not to mention reference counting for the VLAN IDs
that those MAC addresses are installed in. This reference counting
should be done in the DSA core, and the fact that it wasn't needed so
far is due to the fact that the other DSA switches don't have the DSA
tag placed before Ethernet, so the DSA master is able to whitelist the
MAC addresses in hardware.
So this means that even regular traffic termination on a Felix switch
port happens through flooding (because neither Felix nor Ocelot learn
source MAC addresses from CPU-injected frames).
So far we've explained that whitelisting towards PGID_CPU:
- helps to reduce the likelihood of spamming the CPU with frames it
won't process very far anyway
- is implemented in the ocelot driver
- is sufficient for the ocelot use cases
- is not feasible in DSA
- breaks use cases in DSA, in the current status (whitelisting enabled
but no MAC address whitelisted)
So the proposed patch allows unknown unicast frames to be sent to the
CPU port module. This is done for the Felix DSA driver only, as Ocelot
seems to be happy without it.
[0]: https://www.youtube.com/watch?v=B1HhxEcU7Jg
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:03 +00:00
|
|
|
*/
|
2021-03-16 20:10:17 +00:00
|
|
|
#define PGID_BLACKHOLE 57
|
2021-02-12 15:15:58 +00:00
|
|
|
#define PGID_CPU 58
|
|
|
|
#define PGID_UC 59
|
|
|
|
#define PGID_MC 60
|
|
|
|
#define PGID_MCIPV4 61
|
|
|
|
#define PGID_MCIPV6 62
|
|
|
|
#define PGID_BC 63
|
net: dsa: felix: Allow unknown unicast traffic towards the CPU port module
Compared to other DSA switches, in the Ocelot cores, the RX filtering is
a much more important concern.
Firstly, the primary use case for Ocelot is non-DSA, so there isn't any
secondary Ethernet MAC [the DSA master's one] to implicitly drop frames
having a DMAC we are not interested in. So the switch driver itself
needs to install FDB entries towards the CPU port module (PGID_CPU) for
the MAC address of each switch port, in each VLAN installed on the port.
Every address that is not whitelisted is implicitly dropped. This is in
order to achieve a behavior similar to N standalone net devices.
Secondly, even in the secondary use case of DSA, such as illustrated by
Felix with the NPI port mode, that secondary Ethernet MAC is present,
but its RX filter is bypassed. This is because the DSA tags themselves
are placed before Ethernet, so the DMAC that the switch ports see is
not seen by the DSA master too (since it's shifter to the right).
So RX filtering is pretty important. A good RX filter won't bother the
CPU in case the switch port receives a frame that it's not interested
in, and there exists no other line of defense.
Ocelot is pretty strict when it comes to RX filtering: non-IP multicast
and broadcast traffic is allowed to go to the CPU port module, but
unknown unicast isn't. This means that traffic reception for any other
MAC addresses than the ones configured on each switch port net device
won't work. This includes use cases such as macvlan or bridging with a
non-Ocelot (so-called "foreign") interface. But this seems to be fine
for the scenarios that the Linux system embedded inside an Ocelot switch
is intended for - it is simply not interested in unknown unicast
traffic, as explained in Allan Nielsen's presentation [0].
On the other hand, the Felix DSA switch is integrated in more
general-purpose Linux systems, so it can't afford to drop that sort of
traffic in hardware, even if it will end up doing so later, in software.
Actually, unknown unicast means more for Felix than it does for Ocelot.
Felix doesn't attempt to perform the whitelisting of switch port MAC
addresses towards PGID_CPU at all, mainly because it is too complicated
to be feasible: while the MAC addresses are unique in Ocelot, by default
in DSA all ports are equal and inherited from the DSA master. This adds
into account the question of reference counting MAC addresses (delayed
ocelot_mact_forget), not to mention reference counting for the VLAN IDs
that those MAC addresses are installed in. This reference counting
should be done in the DSA core, and the fact that it wasn't needed so
far is due to the fact that the other DSA switches don't have the DSA
tag placed before Ethernet, so the DSA master is able to whitelist the
MAC addresses in hardware.
So this means that even regular traffic termination on a Felix switch
port happens through flooding (because neither Felix nor Ocelot learn
source MAC addresses from CPU-injected frames).
So far we've explained that whitelisting towards PGID_CPU:
- helps to reduce the likelihood of spamming the CPU with frames it
won't process very far anyway
- is implemented in the ocelot driver
- is sufficient for the ocelot use cases
- is not feasible in DSA
- breaks use cases in DSA, in the current status (whitelisting enabled
but no MAC address whitelisted)
So the proposed patch allows unknown unicast frames to be sent to the
CPU port module. This is done for the Felix DSA driver only, as Ocelot
seems to be happy without it.
[0]: https://www.youtube.com/watch?v=B1HhxEcU7Jg
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:03 +00:00
|
|
|
|
2020-06-21 11:46:02 +00:00
|
|
|
#define for_each_unicast_dest_pgid(ocelot, pgid) \
|
|
|
|
for ((pgid) = 0; \
|
|
|
|
(pgid) < (ocelot)->num_phys_ports; \
|
|
|
|
(pgid)++)
|
|
|
|
|
|
|
|
#define for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) \
|
|
|
|
for ((pgid) = (ocelot)->num_phys_ports + 1; \
|
2021-03-16 20:10:17 +00:00
|
|
|
(pgid) < PGID_BLACKHOLE; \
|
2020-06-21 11:46:02 +00:00
|
|
|
(pgid)++)
|
|
|
|
|
|
|
|
#define for_each_aggr_pgid(ocelot, pgid) \
|
|
|
|
for ((pgid) = PGID_AGGR; \
|
|
|
|
(pgid) < PGID_SRC; \
|
|
|
|
(pgid)++)
|
|
|
|
|
net: dsa: felix: Allow unknown unicast traffic towards the CPU port module
Compared to other DSA switches, in the Ocelot cores, the RX filtering is
a much more important concern.
Firstly, the primary use case for Ocelot is non-DSA, so there isn't any
secondary Ethernet MAC [the DSA master's one] to implicitly drop frames
having a DMAC we are not interested in. So the switch driver itself
needs to install FDB entries towards the CPU port module (PGID_CPU) for
the MAC address of each switch port, in each VLAN installed on the port.
Every address that is not whitelisted is implicitly dropped. This is in
order to achieve a behavior similar to N standalone net devices.
Secondly, even in the secondary use case of DSA, such as illustrated by
Felix with the NPI port mode, that secondary Ethernet MAC is present,
but its RX filter is bypassed. This is because the DSA tags themselves
are placed before Ethernet, so the DMAC that the switch ports see is
not seen by the DSA master too (since it's shifter to the right).
So RX filtering is pretty important. A good RX filter won't bother the
CPU in case the switch port receives a frame that it's not interested
in, and there exists no other line of defense.
Ocelot is pretty strict when it comes to RX filtering: non-IP multicast
and broadcast traffic is allowed to go to the CPU port module, but
unknown unicast isn't. This means that traffic reception for any other
MAC addresses than the ones configured on each switch port net device
won't work. This includes use cases such as macvlan or bridging with a
non-Ocelot (so-called "foreign") interface. But this seems to be fine
for the scenarios that the Linux system embedded inside an Ocelot switch
is intended for - it is simply not interested in unknown unicast
traffic, as explained in Allan Nielsen's presentation [0].
On the other hand, the Felix DSA switch is integrated in more
general-purpose Linux systems, so it can't afford to drop that sort of
traffic in hardware, even if it will end up doing so later, in software.
Actually, unknown unicast means more for Felix than it does for Ocelot.
Felix doesn't attempt to perform the whitelisting of switch port MAC
addresses towards PGID_CPU at all, mainly because it is too complicated
to be feasible: while the MAC addresses are unique in Ocelot, by default
in DSA all ports are equal and inherited from the DSA master. This adds
into account the question of reference counting MAC addresses (delayed
ocelot_mact_forget), not to mention reference counting for the VLAN IDs
that those MAC addresses are installed in. This reference counting
should be done in the DSA core, and the fact that it wasn't needed so
far is due to the fact that the other DSA switches don't have the DSA
tag placed before Ethernet, so the DSA master is able to whitelist the
MAC addresses in hardware.
So this means that even regular traffic termination on a Felix switch
port happens through flooding (because neither Felix nor Ocelot learn
source MAC addresses from CPU-injected frames).
So far we've explained that whitelisting towards PGID_CPU:
- helps to reduce the likelihood of spamming the CPU with frames it
won't process very far anyway
- is implemented in the ocelot driver
- is sufficient for the ocelot use cases
- is not feasible in DSA
- breaks use cases in DSA, in the current status (whitelisting enabled
but no MAC address whitelisted)
So the proposed patch allows unknown unicast frames to be sent to the
CPU port module. This is done for the Felix DSA driver only, as Ocelot
seems to be happy without it.
[0]: https://www.youtube.com/watch?v=B1HhxEcU7Jg
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:03 +00:00
|
|
|
/* Aggregation PGIDs, one per Link Aggregation Code */
|
|
|
|
#define PGID_AGGR 64
|
|
|
|
|
|
|
|
/* Source PGIDs, one per physical port */
|
|
|
|
#define PGID_SRC 80
|
|
|
|
|
2021-01-15 02:11:16 +00:00
|
|
|
#define OCELOT_NUM_TC 8
|
2019-11-14 15:03:27 +00:00
|
|
|
|
|
|
|
#define OCELOT_SPEED_2500 0
|
|
|
|
#define OCELOT_SPEED_1000 1
|
|
|
|
#define OCELOT_SPEED_100 2
|
|
|
|
#define OCELOT_SPEED_10 3
|
|
|
|
|
2020-04-20 02:46:49 +00:00
|
|
|
#define OCELOT_PTP_PINS_NUM 4
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
#define TARGET_OFFSET 24
|
|
|
|
#define REG_MASK GENMASK(TARGET_OFFSET - 1, 0)
|
|
|
|
#define REG(reg, offset) [reg & REG_MASK] = offset
|
|
|
|
|
|
|
|
#define REG_RESERVED_ADDR 0xffffffff
|
|
|
|
#define REG_RESERVED(reg) REG(reg, REG_RESERVED_ADDR)
|
|
|
|
|
2022-04-29 21:30:36 +00:00
|
|
|
#define for_each_stat(ocelot, stat) \
|
2022-04-30 23:23:27 +00:00
|
|
|
for ((stat) = (ocelot)->stats_layout; \
|
2022-04-30 23:23:26 +00:00
|
|
|
((stat)->name[0] != '\0'); \
|
2022-04-29 21:30:36 +00:00
|
|
|
(stat)++)
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
enum ocelot_target {
|
|
|
|
ANA = 1,
|
|
|
|
QS,
|
|
|
|
QSYS,
|
|
|
|
REW,
|
|
|
|
SYS,
|
2020-09-29 22:27:25 +00:00
|
|
|
S0,
|
2020-09-29 22:27:24 +00:00
|
|
|
S1,
|
2019-11-14 15:03:27 +00:00
|
|
|
S2,
|
|
|
|
HSIO,
|
|
|
|
PTP,
|
2021-12-09 15:49:11 +00:00
|
|
|
FDMA,
|
2019-11-14 15:03:27 +00:00
|
|
|
GCB,
|
2020-07-13 16:57:01 +00:00
|
|
|
DEV_GMII,
|
2019-11-14 15:03:27 +00:00
|
|
|
TARGET_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ocelot_reg {
|
|
|
|
ANA_ADVLEARN = ANA << TARGET_OFFSET,
|
|
|
|
ANA_VLANMASK,
|
|
|
|
ANA_PORT_B_DOMAIN,
|
|
|
|
ANA_ANAGEFIL,
|
|
|
|
ANA_ANEVENTS,
|
|
|
|
ANA_STORMLIMIT_BURST,
|
|
|
|
ANA_STORMLIMIT_CFG,
|
|
|
|
ANA_ISOLATED_PORTS,
|
|
|
|
ANA_COMMUNITY_PORTS,
|
|
|
|
ANA_AUTOAGE,
|
|
|
|
ANA_MACTOPTIONS,
|
|
|
|
ANA_LEARNDISC,
|
|
|
|
ANA_AGENCTRL,
|
|
|
|
ANA_MIRRORPORTS,
|
|
|
|
ANA_EMIRRORPORTS,
|
|
|
|
ANA_FLOODING,
|
|
|
|
ANA_FLOODING_IPMC,
|
|
|
|
ANA_SFLOW_CFG,
|
|
|
|
ANA_PORT_MODE,
|
|
|
|
ANA_CUT_THRU_CFG,
|
|
|
|
ANA_PGID_PGID,
|
|
|
|
ANA_TABLES_ANMOVED,
|
|
|
|
ANA_TABLES_MACHDATA,
|
|
|
|
ANA_TABLES_MACLDATA,
|
|
|
|
ANA_TABLES_STREAMDATA,
|
|
|
|
ANA_TABLES_MACACCESS,
|
|
|
|
ANA_TABLES_MACTINDX,
|
|
|
|
ANA_TABLES_VLANACCESS,
|
|
|
|
ANA_TABLES_VLANTIDX,
|
|
|
|
ANA_TABLES_ISDXACCESS,
|
|
|
|
ANA_TABLES_ISDXTIDX,
|
|
|
|
ANA_TABLES_ENTRYLIM,
|
|
|
|
ANA_TABLES_PTP_ID_HIGH,
|
|
|
|
ANA_TABLES_PTP_ID_LOW,
|
|
|
|
ANA_TABLES_STREAMACCESS,
|
|
|
|
ANA_TABLES_STREAMTIDX,
|
|
|
|
ANA_TABLES_SEQ_HISTORY,
|
|
|
|
ANA_TABLES_SEQ_MASK,
|
|
|
|
ANA_TABLES_SFID_MASK,
|
|
|
|
ANA_TABLES_SFIDACCESS,
|
|
|
|
ANA_TABLES_SFIDTIDX,
|
|
|
|
ANA_MSTI_STATE,
|
|
|
|
ANA_OAM_UPM_LM_CNT,
|
|
|
|
ANA_SG_ACCESS_CTRL,
|
|
|
|
ANA_SG_CONFIG_REG_1,
|
|
|
|
ANA_SG_CONFIG_REG_2,
|
|
|
|
ANA_SG_CONFIG_REG_3,
|
|
|
|
ANA_SG_CONFIG_REG_4,
|
|
|
|
ANA_SG_CONFIG_REG_5,
|
|
|
|
ANA_SG_GCL_GS_CONFIG,
|
|
|
|
ANA_SG_GCL_TI_CONFIG,
|
|
|
|
ANA_SG_STATUS_REG_1,
|
|
|
|
ANA_SG_STATUS_REG_2,
|
|
|
|
ANA_SG_STATUS_REG_3,
|
|
|
|
ANA_PORT_VLAN_CFG,
|
|
|
|
ANA_PORT_DROP_CFG,
|
|
|
|
ANA_PORT_QOS_CFG,
|
|
|
|
ANA_PORT_VCAP_CFG,
|
|
|
|
ANA_PORT_VCAP_S1_KEY_CFG,
|
|
|
|
ANA_PORT_VCAP_S2_CFG,
|
|
|
|
ANA_PORT_PCP_DEI_MAP,
|
|
|
|
ANA_PORT_CPU_FWD_CFG,
|
|
|
|
ANA_PORT_CPU_FWD_BPDU_CFG,
|
|
|
|
ANA_PORT_CPU_FWD_GARP_CFG,
|
|
|
|
ANA_PORT_CPU_FWD_CCM_CFG,
|
|
|
|
ANA_PORT_PORT_CFG,
|
|
|
|
ANA_PORT_POL_CFG,
|
|
|
|
ANA_PORT_PTP_CFG,
|
|
|
|
ANA_PORT_PTP_DLY1_CFG,
|
|
|
|
ANA_PORT_PTP_DLY2_CFG,
|
|
|
|
ANA_PORT_SFID_CFG,
|
|
|
|
ANA_PFC_PFC_CFG,
|
|
|
|
ANA_PFC_PFC_TIMER,
|
|
|
|
ANA_IPT_OAM_MEP_CFG,
|
|
|
|
ANA_IPT_IPT,
|
|
|
|
ANA_PPT_PPT,
|
|
|
|
ANA_FID_MAP_FID_MAP,
|
|
|
|
ANA_AGGR_CFG,
|
|
|
|
ANA_CPUQ_CFG,
|
|
|
|
ANA_CPUQ_CFG2,
|
|
|
|
ANA_CPUQ_8021_CFG,
|
|
|
|
ANA_DSCP_CFG,
|
|
|
|
ANA_DSCP_REWR_CFG,
|
|
|
|
ANA_VCAP_RNG_TYPE_CFG,
|
|
|
|
ANA_VCAP_RNG_VAL_CFG,
|
|
|
|
ANA_VRAP_CFG,
|
|
|
|
ANA_VRAP_HDR_DATA,
|
|
|
|
ANA_VRAP_HDR_MASK,
|
|
|
|
ANA_DISCARD_CFG,
|
|
|
|
ANA_FID_CFG,
|
|
|
|
ANA_POL_PIR_CFG,
|
|
|
|
ANA_POL_CIR_CFG,
|
|
|
|
ANA_POL_MODE_CFG,
|
|
|
|
ANA_POL_PIR_STATE,
|
|
|
|
ANA_POL_CIR_STATE,
|
|
|
|
ANA_POL_STATE,
|
|
|
|
ANA_POL_FLOWC,
|
|
|
|
ANA_POL_HYST,
|
|
|
|
ANA_POL_MISC_CFG,
|
|
|
|
QS_XTR_GRP_CFG = QS << TARGET_OFFSET,
|
|
|
|
QS_XTR_RD,
|
|
|
|
QS_XTR_FRM_PRUNING,
|
|
|
|
QS_XTR_FLUSH,
|
|
|
|
QS_XTR_DATA_PRESENT,
|
|
|
|
QS_XTR_CFG,
|
|
|
|
QS_INJ_GRP_CFG,
|
|
|
|
QS_INJ_WR,
|
|
|
|
QS_INJ_CTRL,
|
|
|
|
QS_INJ_STATUS,
|
|
|
|
QS_INJ_ERR,
|
|
|
|
QS_INH_DBG,
|
|
|
|
QSYS_PORT_MODE = QSYS << TARGET_OFFSET,
|
|
|
|
QSYS_SWITCH_PORT_MODE,
|
|
|
|
QSYS_STAT_CNT_CFG,
|
|
|
|
QSYS_EEE_CFG,
|
|
|
|
QSYS_EEE_THRES,
|
|
|
|
QSYS_IGR_NO_SHARING,
|
|
|
|
QSYS_EGR_NO_SHARING,
|
|
|
|
QSYS_SW_STATUS,
|
|
|
|
QSYS_EXT_CPU_CFG,
|
|
|
|
QSYS_PAD_CFG,
|
|
|
|
QSYS_CPU_GROUP_MAP,
|
|
|
|
QSYS_QMAP,
|
|
|
|
QSYS_ISDX_SGRP,
|
|
|
|
QSYS_TIMED_FRAME_ENTRY,
|
|
|
|
QSYS_TFRM_MISC,
|
|
|
|
QSYS_TFRM_PORT_DLY,
|
|
|
|
QSYS_TFRM_TIMER_CFG_1,
|
|
|
|
QSYS_TFRM_TIMER_CFG_2,
|
|
|
|
QSYS_TFRM_TIMER_CFG_3,
|
|
|
|
QSYS_TFRM_TIMER_CFG_4,
|
|
|
|
QSYS_TFRM_TIMER_CFG_5,
|
|
|
|
QSYS_TFRM_TIMER_CFG_6,
|
|
|
|
QSYS_TFRM_TIMER_CFG_7,
|
|
|
|
QSYS_TFRM_TIMER_CFG_8,
|
|
|
|
QSYS_RED_PROFILE,
|
|
|
|
QSYS_RES_QOS_MODE,
|
|
|
|
QSYS_RES_CFG,
|
|
|
|
QSYS_RES_STAT,
|
|
|
|
QSYS_EGR_DROP_MODE,
|
|
|
|
QSYS_EQ_CTRL,
|
|
|
|
QSYS_EVENTS_CORE,
|
|
|
|
QSYS_QMAXSDU_CFG_0,
|
|
|
|
QSYS_QMAXSDU_CFG_1,
|
|
|
|
QSYS_QMAXSDU_CFG_2,
|
|
|
|
QSYS_QMAXSDU_CFG_3,
|
|
|
|
QSYS_QMAXSDU_CFG_4,
|
|
|
|
QSYS_QMAXSDU_CFG_5,
|
|
|
|
QSYS_QMAXSDU_CFG_6,
|
|
|
|
QSYS_QMAXSDU_CFG_7,
|
|
|
|
QSYS_PREEMPTION_CFG,
|
|
|
|
QSYS_CIR_CFG,
|
|
|
|
QSYS_EIR_CFG,
|
|
|
|
QSYS_SE_CFG,
|
|
|
|
QSYS_SE_DWRR_CFG,
|
|
|
|
QSYS_SE_CONNECT,
|
|
|
|
QSYS_SE_DLB_SENSE,
|
|
|
|
QSYS_CIR_STATE,
|
|
|
|
QSYS_EIR_STATE,
|
|
|
|
QSYS_SE_STATE,
|
|
|
|
QSYS_HSCH_MISC_CFG,
|
|
|
|
QSYS_TAG_CONFIG,
|
|
|
|
QSYS_TAS_PARAM_CFG_CTRL,
|
|
|
|
QSYS_PORT_MAX_SDU,
|
|
|
|
QSYS_PARAM_CFG_REG_1,
|
|
|
|
QSYS_PARAM_CFG_REG_2,
|
|
|
|
QSYS_PARAM_CFG_REG_3,
|
|
|
|
QSYS_PARAM_CFG_REG_4,
|
|
|
|
QSYS_PARAM_CFG_REG_5,
|
|
|
|
QSYS_GCL_CFG_REG_1,
|
|
|
|
QSYS_GCL_CFG_REG_2,
|
|
|
|
QSYS_PARAM_STATUS_REG_1,
|
|
|
|
QSYS_PARAM_STATUS_REG_2,
|
|
|
|
QSYS_PARAM_STATUS_REG_3,
|
|
|
|
QSYS_PARAM_STATUS_REG_4,
|
|
|
|
QSYS_PARAM_STATUS_REG_5,
|
|
|
|
QSYS_PARAM_STATUS_REG_6,
|
|
|
|
QSYS_PARAM_STATUS_REG_7,
|
|
|
|
QSYS_PARAM_STATUS_REG_8,
|
|
|
|
QSYS_PARAM_STATUS_REG_9,
|
|
|
|
QSYS_GCL_STATUS_REG_1,
|
|
|
|
QSYS_GCL_STATUS_REG_2,
|
|
|
|
REW_PORT_VLAN_CFG = REW << TARGET_OFFSET,
|
|
|
|
REW_TAG_CFG,
|
|
|
|
REW_PORT_CFG,
|
|
|
|
REW_DSCP_CFG,
|
|
|
|
REW_PCP_DEI_QOS_MAP_CFG,
|
|
|
|
REW_PTP_CFG,
|
|
|
|
REW_PTP_DLY1_CFG,
|
|
|
|
REW_RED_TAG_CFG,
|
|
|
|
REW_DSCP_REMAP_DP1_CFG,
|
|
|
|
REW_DSCP_REMAP_CFG,
|
|
|
|
REW_STAT_CFG,
|
|
|
|
REW_REW_STICKY,
|
|
|
|
REW_PPT,
|
|
|
|
SYS_COUNT_RX_OCTETS = SYS << TARGET_OFFSET,
|
|
|
|
SYS_COUNT_RX_UNICAST,
|
|
|
|
SYS_COUNT_RX_MULTICAST,
|
|
|
|
SYS_COUNT_RX_BROADCAST,
|
|
|
|
SYS_COUNT_RX_SHORTS,
|
|
|
|
SYS_COUNT_RX_FRAGMENTS,
|
|
|
|
SYS_COUNT_RX_JABBERS,
|
|
|
|
SYS_COUNT_RX_CRC_ALIGN_ERRS,
|
|
|
|
SYS_COUNT_RX_SYM_ERRS,
|
|
|
|
SYS_COUNT_RX_64,
|
|
|
|
SYS_COUNT_RX_65_127,
|
|
|
|
SYS_COUNT_RX_128_255,
|
|
|
|
SYS_COUNT_RX_256_1023,
|
|
|
|
SYS_COUNT_RX_1024_1526,
|
|
|
|
SYS_COUNT_RX_1527_MAX,
|
|
|
|
SYS_COUNT_RX_PAUSE,
|
|
|
|
SYS_COUNT_RX_CONTROL,
|
|
|
|
SYS_COUNT_RX_LONGS,
|
|
|
|
SYS_COUNT_RX_CLASSIFIED_DROPS,
|
|
|
|
SYS_COUNT_TX_OCTETS,
|
|
|
|
SYS_COUNT_TX_UNICAST,
|
|
|
|
SYS_COUNT_TX_MULTICAST,
|
|
|
|
SYS_COUNT_TX_BROADCAST,
|
|
|
|
SYS_COUNT_TX_COLLISION,
|
|
|
|
SYS_COUNT_TX_DROPS,
|
|
|
|
SYS_COUNT_TX_PAUSE,
|
|
|
|
SYS_COUNT_TX_64,
|
|
|
|
SYS_COUNT_TX_65_127,
|
|
|
|
SYS_COUNT_TX_128_511,
|
|
|
|
SYS_COUNT_TX_512_1023,
|
|
|
|
SYS_COUNT_TX_1024_1526,
|
|
|
|
SYS_COUNT_TX_1527_MAX,
|
|
|
|
SYS_COUNT_TX_AGING,
|
|
|
|
SYS_RESET_CFG,
|
|
|
|
SYS_SR_ETYPE_CFG,
|
|
|
|
SYS_VLAN_ETYPE_CFG,
|
|
|
|
SYS_PORT_MODE,
|
|
|
|
SYS_FRONT_PORT_MODE,
|
|
|
|
SYS_FRM_AGING,
|
|
|
|
SYS_STAT_CFG,
|
|
|
|
SYS_SW_STATUS,
|
|
|
|
SYS_MISC_CFG,
|
|
|
|
SYS_REW_MAC_HIGH_CFG,
|
|
|
|
SYS_REW_MAC_LOW_CFG,
|
|
|
|
SYS_TIMESTAMP_OFFSET,
|
|
|
|
SYS_CMID,
|
|
|
|
SYS_PAUSE_CFG,
|
|
|
|
SYS_PAUSE_TOT_CFG,
|
|
|
|
SYS_ATOP,
|
|
|
|
SYS_ATOP_TOT_CFG,
|
|
|
|
SYS_MAC_FC_CFG,
|
|
|
|
SYS_MMGT,
|
|
|
|
SYS_MMGT_FAST,
|
|
|
|
SYS_EVENTS_DIF,
|
|
|
|
SYS_EVENTS_CORE,
|
|
|
|
SYS_CNT,
|
|
|
|
SYS_PTP_STATUS,
|
|
|
|
SYS_PTP_TXSTAMP,
|
|
|
|
SYS_PTP_NXT,
|
|
|
|
SYS_PTP_CFG,
|
|
|
|
SYS_RAM_INIT,
|
|
|
|
SYS_CM_ADDR,
|
|
|
|
SYS_CM_DATA_WR,
|
|
|
|
SYS_CM_DATA_RD,
|
|
|
|
SYS_CM_OP,
|
|
|
|
SYS_CM_DATA,
|
|
|
|
PTP_PIN_CFG = PTP << TARGET_OFFSET,
|
|
|
|
PTP_PIN_TOD_SEC_MSB,
|
|
|
|
PTP_PIN_TOD_SEC_LSB,
|
|
|
|
PTP_PIN_TOD_NSEC,
|
2020-04-20 02:46:48 +00:00
|
|
|
PTP_PIN_WF_HIGH_PERIOD,
|
|
|
|
PTP_PIN_WF_LOW_PERIOD,
|
2019-11-14 15:03:27 +00:00
|
|
|
PTP_CFG_MISC,
|
|
|
|
PTP_CLK_CFG_ADJ_CFG,
|
|
|
|
PTP_CLK_CFG_ADJ_FREQ,
|
|
|
|
GCB_SOFT_RST = GCB << TARGET_OFFSET,
|
2020-07-13 16:57:02 +00:00
|
|
|
GCB_MIIM_MII_STATUS,
|
|
|
|
GCB_MIIM_MII_CMD,
|
|
|
|
GCB_MIIM_MII_DATA,
|
2020-07-13 16:57:01 +00:00
|
|
|
DEV_CLOCK_CFG = DEV_GMII << TARGET_OFFSET,
|
|
|
|
DEV_PORT_MISC,
|
|
|
|
DEV_EVENTS,
|
|
|
|
DEV_EEE_CFG,
|
|
|
|
DEV_RX_PATH_DELAY,
|
|
|
|
DEV_TX_PATH_DELAY,
|
|
|
|
DEV_PTP_PREDICT_CFG,
|
|
|
|
DEV_MAC_ENA_CFG,
|
|
|
|
DEV_MAC_MODE_CFG,
|
|
|
|
DEV_MAC_MAXLEN_CFG,
|
|
|
|
DEV_MAC_TAGS_CFG,
|
|
|
|
DEV_MAC_ADV_CHK_CFG,
|
|
|
|
DEV_MAC_IFG_CFG,
|
|
|
|
DEV_MAC_HDX_CFG,
|
|
|
|
DEV_MAC_DBG_CFG,
|
|
|
|
DEV_MAC_FC_MAC_LOW_CFG,
|
|
|
|
DEV_MAC_FC_MAC_HIGH_CFG,
|
|
|
|
DEV_MAC_STICKY,
|
|
|
|
PCS1G_CFG,
|
|
|
|
PCS1G_MODE_CFG,
|
|
|
|
PCS1G_SD_CFG,
|
|
|
|
PCS1G_ANEG_CFG,
|
|
|
|
PCS1G_ANEG_NP_CFG,
|
|
|
|
PCS1G_LB_CFG,
|
|
|
|
PCS1G_DBG_CFG,
|
|
|
|
PCS1G_CDET_CFG,
|
|
|
|
PCS1G_ANEG_STATUS,
|
|
|
|
PCS1G_ANEG_NP_STATUS,
|
|
|
|
PCS1G_LINK_STATUS,
|
|
|
|
PCS1G_LINK_DOWN_CNT,
|
|
|
|
PCS1G_STICKY,
|
|
|
|
PCS1G_DEBUG_STATUS,
|
|
|
|
PCS1G_LPI_CFG,
|
|
|
|
PCS1G_LPI_WAKE_ERROR_CNT,
|
|
|
|
PCS1G_LPI_STATUS,
|
|
|
|
PCS1G_TSTPAT_MODE_CFG,
|
|
|
|
PCS1G_TSTPAT_STATUS,
|
|
|
|
DEV_PCS_FX100_CFG,
|
|
|
|
DEV_PCS_FX100_STATUS,
|
2019-11-14 15:03:27 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum ocelot_regfield {
|
|
|
|
ANA_ADVLEARN_VLAN_CHK,
|
|
|
|
ANA_ADVLEARN_LEARN_MIRROR,
|
|
|
|
ANA_ANEVENTS_FLOOD_DISCARD,
|
|
|
|
ANA_ANEVENTS_MSTI_DROP,
|
|
|
|
ANA_ANEVENTS_ACLKILL,
|
|
|
|
ANA_ANEVENTS_ACLUSED,
|
|
|
|
ANA_ANEVENTS_AUTOAGE,
|
|
|
|
ANA_ANEVENTS_VS2TTL1,
|
|
|
|
ANA_ANEVENTS_STORM_DROP,
|
|
|
|
ANA_ANEVENTS_LEARN_DROP,
|
|
|
|
ANA_ANEVENTS_AGED_ENTRY,
|
|
|
|
ANA_ANEVENTS_CPU_LEARN_FAILED,
|
|
|
|
ANA_ANEVENTS_AUTO_LEARN_FAILED,
|
|
|
|
ANA_ANEVENTS_LEARN_REMOVE,
|
|
|
|
ANA_ANEVENTS_AUTO_LEARNED,
|
|
|
|
ANA_ANEVENTS_AUTO_MOVED,
|
|
|
|
ANA_ANEVENTS_DROPPED,
|
|
|
|
ANA_ANEVENTS_CLASSIFIED_DROP,
|
|
|
|
ANA_ANEVENTS_CLASSIFIED_COPY,
|
|
|
|
ANA_ANEVENTS_VLAN_DISCARD,
|
|
|
|
ANA_ANEVENTS_FWD_DISCARD,
|
|
|
|
ANA_ANEVENTS_MULTICAST_FLOOD,
|
|
|
|
ANA_ANEVENTS_UNICAST_FLOOD,
|
|
|
|
ANA_ANEVENTS_DEST_KNOWN,
|
|
|
|
ANA_ANEVENTS_BUCKET3_MATCH,
|
|
|
|
ANA_ANEVENTS_BUCKET2_MATCH,
|
|
|
|
ANA_ANEVENTS_BUCKET1_MATCH,
|
|
|
|
ANA_ANEVENTS_BUCKET0_MATCH,
|
|
|
|
ANA_ANEVENTS_CPU_OPERATION,
|
|
|
|
ANA_ANEVENTS_DMAC_LOOKUP,
|
|
|
|
ANA_ANEVENTS_SMAC_LOOKUP,
|
|
|
|
ANA_ANEVENTS_SEQ_GEN_ERR_0,
|
|
|
|
ANA_ANEVENTS_SEQ_GEN_ERR_1,
|
|
|
|
ANA_TABLES_MACACCESS_B_DOM,
|
|
|
|
ANA_TABLES_MACTINDX_BUCKET,
|
|
|
|
ANA_TABLES_MACTINDX_M_INDEX,
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-13 16:57:03 +00:00
|
|
|
QSYS_SWITCH_PORT_MODE_PORT_ENA,
|
|
|
|
QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG,
|
|
|
|
QSYS_SWITCH_PORT_MODE_YEL_RSRVD,
|
|
|
|
QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE,
|
|
|
|
QSYS_SWITCH_PORT_MODE_TX_PFC_ENA,
|
|
|
|
QSYS_SWITCH_PORT_MODE_TX_PFC_MODE,
|
2019-11-14 15:03:27 +00:00
|
|
|
QSYS_TIMED_FRAME_ENTRY_TFRM_VLD,
|
|
|
|
QSYS_TIMED_FRAME_ENTRY_TFRM_FP,
|
|
|
|
QSYS_TIMED_FRAME_ENTRY_TFRM_PORTNO,
|
|
|
|
QSYS_TIMED_FRAME_ENTRY_TFRM_TM_SEL,
|
|
|
|
QSYS_TIMED_FRAME_ENTRY_TFRM_TM_T,
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-13 16:57:03 +00:00
|
|
|
SYS_PORT_MODE_DATA_WO_TS,
|
|
|
|
SYS_PORT_MODE_INCL_INJ_HDR,
|
|
|
|
SYS_PORT_MODE_INCL_XTR_HDR,
|
|
|
|
SYS_PORT_MODE_INCL_HDR_ERR,
|
2019-11-14 15:03:27 +00:00
|
|
|
SYS_RESET_CFG_CORE_ENA,
|
|
|
|
SYS_RESET_CFG_MEM_ENA,
|
|
|
|
SYS_RESET_CFG_MEM_INIT,
|
|
|
|
GCB_SOFT_RST_SWC_RST,
|
2020-07-13 16:57:02 +00:00
|
|
|
GCB_MIIM_MII_STATUS_PENDING,
|
|
|
|
GCB_MIIM_MII_STATUS_BUSY,
|
2020-07-13 16:57:07 +00:00
|
|
|
SYS_PAUSE_CFG_PAUSE_START,
|
|
|
|
SYS_PAUSE_CFG_PAUSE_STOP,
|
|
|
|
SYS_PAUSE_CFG_PAUSE_ENA,
|
2019-11-14 15:03:27 +00:00
|
|
|
REGFIELD_MAX
|
|
|
|
};
|
|
|
|
|
net: mscc: ocelot: generalize existing code for VCAP
In the Ocelot switches there are 3 TCAMs: VCAP ES0, IS1 and IS2, which
have the same configuration interface, but different sets of keys and
actions. The driver currently only supports VCAP IS2.
In preparation of VCAP IS1 and ES0 support, the existing code must be
generalized to work with any VCAP.
In that direction, we should move the structures that depend upon VCAP
instantiation, like vcap_is2_keys and vcap_is2_actions, out of struct
ocelot and into struct vcap_props .keys and .actions, a structure that
is replicated 3 times, once per VCAP. We'll pass that structure as an
argument to each function that does the key and action packing - only
the control logic needs to distinguish between ocelot->vcap[VCAP_IS2]
or IS1 or ES0.
Another change is to make use of the newly introduced ocelot_target_read
and ocelot_target_write API, since the 3 VCAPs have the same registers
but put at different addresses.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-29 22:27:23 +00:00
|
|
|
enum {
|
|
|
|
/* VCAP_CORE_CFG */
|
|
|
|
VCAP_CORE_UPDATE_CTRL,
|
|
|
|
VCAP_CORE_MV_CFG,
|
|
|
|
/* VCAP_CORE_CACHE */
|
|
|
|
VCAP_CACHE_ENTRY_DAT,
|
|
|
|
VCAP_CACHE_MASK_DAT,
|
|
|
|
VCAP_CACHE_ACTION_DAT,
|
|
|
|
VCAP_CACHE_CNT_DAT,
|
|
|
|
VCAP_CACHE_TG_DAT,
|
2020-09-29 22:27:26 +00:00
|
|
|
/* VCAP_CONST */
|
|
|
|
VCAP_CONST_VCAP_VER,
|
|
|
|
VCAP_CONST_ENTRY_WIDTH,
|
|
|
|
VCAP_CONST_ENTRY_CNT,
|
|
|
|
VCAP_CONST_ENTRY_SWCNT,
|
|
|
|
VCAP_CONST_ENTRY_TG_WIDTH,
|
|
|
|
VCAP_CONST_ACTION_DEF_CNT,
|
|
|
|
VCAP_CONST_ACTION_WIDTH,
|
|
|
|
VCAP_CONST_CNT_WIDTH,
|
|
|
|
VCAP_CONST_CORE_CNT,
|
|
|
|
VCAP_CONST_IF_CNT,
|
net: mscc: ocelot: generalize existing code for VCAP
In the Ocelot switches there are 3 TCAMs: VCAP ES0, IS1 and IS2, which
have the same configuration interface, but different sets of keys and
actions. The driver currently only supports VCAP IS2.
In preparation of VCAP IS1 and ES0 support, the existing code must be
generalized to work with any VCAP.
In that direction, we should move the structures that depend upon VCAP
instantiation, like vcap_is2_keys and vcap_is2_actions, out of struct
ocelot and into struct vcap_props .keys and .actions, a structure that
is replicated 3 times, once per VCAP. We'll pass that structure as an
argument to each function that does the key and action packing - only
the control logic needs to distinguish between ocelot->vcap[VCAP_IS2]
or IS1 or ES0.
Another change is to make use of the newly introduced ocelot_target_read
and ocelot_target_write API, since the 3 VCAPs have the same registers
but put at different addresses.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-29 22:27:23 +00:00
|
|
|
};
|
|
|
|
|
2020-04-20 02:46:47 +00:00
|
|
|
enum ocelot_ptp_pins {
|
|
|
|
PTP_PIN_0,
|
|
|
|
PTP_PIN_1,
|
|
|
|
PTP_PIN_2,
|
|
|
|
PTP_PIN_3,
|
2019-11-14 15:03:27 +00:00
|
|
|
TOD_ACC_PIN
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ocelot_stat_layout {
|
|
|
|
u32 offset;
|
|
|
|
char name[ETH_GSTRING_LEN];
|
|
|
|
};
|
|
|
|
|
2022-04-30 23:23:26 +00:00
|
|
|
#define OCELOT_STAT_END { .name = "" }
|
2022-04-29 21:30:36 +00:00
|
|
|
|
2022-02-13 19:12:54 +00:00
|
|
|
struct ocelot_stats_region {
|
|
|
|
struct list_head node;
|
|
|
|
u32 offset;
|
|
|
|
int count;
|
|
|
|
u32 *buf;
|
|
|
|
};
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
enum ocelot_tag_prefix {
|
|
|
|
OCELOT_TAG_PREFIX_DISABLED = 0,
|
|
|
|
OCELOT_TAG_PREFIX_NONE,
|
|
|
|
OCELOT_TAG_PREFIX_SHORT,
|
|
|
|
OCELOT_TAG_PREFIX_LONG,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ocelot;
|
|
|
|
|
|
|
|
struct ocelot_ops {
|
2020-10-02 12:02:21 +00:00
|
|
|
struct net_device *(*port_to_netdev)(struct ocelot *ocelot, int port);
|
|
|
|
int (*netdev_to_port)(struct net_device *dev);
|
2019-11-14 15:03:27 +00:00
|
|
|
int (*reset)(struct ocelot *ocelot);
|
2020-07-13 16:57:08 +00:00
|
|
|
u16 (*wm_enc)(u16 value);
|
2021-01-15 02:11:12 +00:00
|
|
|
u16 (*wm_dec)(u16 value);
|
|
|
|
void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse);
|
2021-11-18 10:11:59 +00:00
|
|
|
void (*psfp_init)(struct ocelot *ocelot);
|
2021-11-18 10:12:04 +00:00
|
|
|
int (*psfp_filter_add)(struct ocelot *ocelot, int port,
|
|
|
|
struct flow_cls_offload *f);
|
2021-11-18 10:11:59 +00:00
|
|
|
int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f);
|
|
|
|
int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f,
|
|
|
|
struct flow_stats *stats);
|
net: dsa: felix: enable cut-through forwarding between ports by default
The VSC9959 switch embedded within NXP LS1028A (and that version of
Ocelot switches only) supports cut-through forwarding - meaning it can
start the process of looking up the destination ports for a packet, and
forward towards those ports, before the entire packet has been received
(as opposed to the store-and-forward mode).
The up side is having lower forwarding latency for large packets. The
down side is that frames with FCS errors are forwarded instead of being
dropped. However, erroneous frames do not result in incorrect updates of
the FDB or incorrect policer updates, since these processes are deferred
inside the switch to the end of frame. Since the switch starts the
cut-through forwarding process after all packet headers (including IP,
if any) have been processed, packets with large headers and small
payload do not see the benefit of lower forwarding latency.
There are two cases that need special attention.
The first is when a packet is multicast (or flooded) to multiple
destinations, one of which doesn't have cut-through forwarding enabled.
The switch deals with this automatically by disabling cut-through
forwarding for the frame towards all destination ports.
The second is when a packet is forwarded from a port of lower link speed
towards a port of higher link speed. This is not handled by the hardware
and needs software intervention.
Since we practically need to update the cut-through forwarding domain
from paths that aren't serialized by the rtnl_mutex (phylink
mac_link_down/mac_link_up ops), this means we need to serialize physical
link events with user space updates of bonding/bridging domains.
Enabling cut-through forwarding is done per {egress port, traffic class}.
I don't see any reason why this would be a configurable option as long
as it works without issues, and there doesn't appear to be any user
space configuration tool to toggle this on/off, so this patch enables
cut-through forwarding on all eligible ports and traffic classes.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20211125125808.2383984-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-11-25 12:58:08 +00:00
|
|
|
void (*cut_through_fwd)(struct ocelot *ocelot);
|
2019-11-14 15:03:27 +00:00
|
|
|
};
|
|
|
|
|
2021-11-18 10:12:02 +00:00
|
|
|
struct ocelot_vcap_policer {
|
|
|
|
struct list_head pol_list;
|
|
|
|
u16 base;
|
|
|
|
u16 max;
|
|
|
|
u16 base2;
|
|
|
|
u16 max2;
|
|
|
|
};
|
|
|
|
|
2020-06-20 15:43:46 +00:00
|
|
|
struct ocelot_vcap_block {
|
net: mscc: ocelot: simplify tc-flower offload structures
The ocelot tc-flower offload binds a second flow block callback (apart
from the one for matchall) just because it uses a different block
private structure (ocelot_port_private for matchall, ocelot_port_block
for flower).
But ocelot_port_block just appears to be boilerplate, and doesn't help
with anything in particular at all, it's just useless glue between the
(global!) struct ocelot_acl_block *block pointer, and a per-netdevice
struct ocelot_port_private *priv.
So let's just simplify that, and make struct ocelot_port_private be the
private structure for the block offload. This makes us able to use the
same flow callback as in the case of matchall.
This also reveals that the struct ocelot_acl_block *block is used rather
strangely, as mentioned above: it is defined globally, allocated at
probe time, and freed at unbind time. So just move the structure to the
main ocelot structure, which gives further opportunity for
simplification.
Also get rid of backpointers from struct ocelot_acl_block and struct
ocelot_ace_rule back to struct ocelot, by reworking the function
prototypes, where necessary, to use a more DSA-friendly "struct ocelot
*ocelot, int port" format.
And finally, remove the debugging prints that were added during
development, since they provide no useful information at this point.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:31:06 +00:00
|
|
|
struct list_head rules;
|
|
|
|
int count;
|
|
|
|
};
|
|
|
|
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-20 17:58:49 +00:00
|
|
|
struct ocelot_bridge_vlan {
|
|
|
|
u16 vid;
|
|
|
|
unsigned long portmask;
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-20 17:58:50 +00:00
|
|
|
unsigned long untagged;
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-20 17:58:49 +00:00
|
|
|
struct list_head list;
|
|
|
|
};
|
|
|
|
|
2021-10-20 17:58:48 +00:00
|
|
|
enum ocelot_port_tag_config {
|
|
|
|
/* all VLANs are egress-untagged */
|
|
|
|
OCELOT_PORT_TAG_DISABLED = 0,
|
|
|
|
/* all VLANs except the native VLAN and VID 0 are egress-tagged */
|
|
|
|
OCELOT_PORT_TAG_NATIVE = 1,
|
|
|
|
/* all VLANs except VID 0 are egress-tagged */
|
|
|
|
OCELOT_PORT_TAG_TRUNK_NO_VID0 = 2,
|
|
|
|
/* all VLANs are egress-tagged */
|
|
|
|
OCELOT_PORT_TAG_TRUNK = 3,
|
|
|
|
};
|
|
|
|
|
2021-11-18 10:12:00 +00:00
|
|
|
struct ocelot_psfp_list {
|
|
|
|
struct list_head stream_list;
|
|
|
|
struct list_head sfi_list;
|
|
|
|
struct list_head sgi_list;
|
|
|
|
};
|
|
|
|
|
net: mscc: ocelot: configure watermarks using devlink-sb
Using devlink-sb, we can configure 12/16 (the important 75%) of the
switch's controlling watermarks for congestion drops, and we can monitor
50% of the watermark occupancies (we can monitor the reservation
watermarks, but not the sharing watermarks, which are exposed as pool
sizes).
The following definitions can be made:
SB_BUF=0 # The devlink-sb for frame buffers
SB_REF=1 # The devlink-sb for frame references
POOL_ING=0 # The pool for ingress traffic. Both devlink-sb instances
# have one of these.
POOL_EGR=1 # The pool for egress traffic. Both devlink-sb instances
# have one of these.
Editing the hardware watermarks is done in the following way:
BUF_xxxx_I is accessed when sb=$SB_BUF and pool=$POOL_ING
REF_xxxx_I is accessed when sb=$SB_REF and pool=$POOL_ING
BUF_xxxx_E is accessed when sb=$SB_BUF and pool=$POOL_EGR
REF_xxxx_E is accessed when sb=$SB_REF and pool=$POOL_EGR
Configuring the sharing watermarks for COL_SHR(dp=0) is done implicitly
by modifying the corresponding pool size. By default, the pool size has
maximum size, so this can be skipped.
devlink sb pool set pci/0000:00:00.5 sb $SB_BUF pool $POOL_ING \
size 129840 thtype static
Since by default there is no buffer reservation, the above command has
maxed out BUF_COL_SHR_I(dp=0).
Configuring the per-port reservation watermark (P_RSRV) is done in the
following way:
devlink sb port pool set pci/0000:00:00.5/0 sb $SB_BUF \
pool $POOL_ING th 1000
The above command sets BUF_P_RSRV_I(port 0) to 1000 bytes. After this
command, the sharing watermarks are internally reconfigured with 1000
bytes less, i.e. from 129840 bytes to 128840 bytes.
Configuring the per-port-tc reservation watermarks (Q_RSRV) is done in
the following way:
for tc in {0..7}; do
devlink sb tc bind set pci/0000:00:00.5/0 sb 0 tc $tc \
type ingress pool $POOL_ING \
th 3000
done
The above command sets BUF_Q_RSRV_I(port 0, tc 0..7) to 3000 bytes.
The sharing watermarks are again reconfigured with 24000 bytes less.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-15 02:11:20 +00:00
|
|
|
enum ocelot_sb {
|
|
|
|
OCELOT_SB_BUF,
|
|
|
|
OCELOT_SB_REF,
|
|
|
|
OCELOT_SB_NUM,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ocelot_sb_pool {
|
|
|
|
OCELOT_SB_POOL_ING,
|
|
|
|
OCELOT_SB_POOL_EGR,
|
|
|
|
OCELOT_SB_POOL_NUM,
|
|
|
|
};
|
|
|
|
|
2021-11-18 10:11:57 +00:00
|
|
|
/* MAC table entry types.
|
|
|
|
* ENTRYTYPE_NORMAL is subject to aging.
|
|
|
|
* ENTRYTYPE_LOCKED is not subject to aging.
|
|
|
|
* ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast.
|
|
|
|
* ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast.
|
|
|
|
*/
|
|
|
|
enum macaccess_entry_type {
|
|
|
|
ENTRYTYPE_NORMAL = 0,
|
|
|
|
ENTRYTYPE_LOCKED,
|
|
|
|
ENTRYTYPE_MACv4,
|
|
|
|
ENTRYTYPE_MACv6,
|
|
|
|
};
|
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 01:47:48 +00:00
|
|
|
#define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0)
|
|
|
|
#define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1)
|
|
|
|
|
2022-02-23 14:00:54 +00:00
|
|
|
struct ocelot_lag_fdb {
|
|
|
|
unsigned char addr[ETH_ALEN];
|
|
|
|
u16 vid;
|
|
|
|
struct net_device *bond;
|
|
|
|
struct list_head list;
|
|
|
|
};
|
|
|
|
|
net: mscc: ocelot: add port mirroring support using tc-matchall
Ocelot switches perform port-based ingress mirroring if
ANA:PORT:PORT_CFG field SRC_MIRROR_ENA is set, and egress mirroring if
the port is in ANA:ANA:EMIRRORPORTS.
Both ingress-mirrored and egress-mirrored frames are copied to the port
mask from ANA:ANA:MIRRORPORTS.
So the choice of limiting to a single mirror port via ocelot_mirror_get()
and ocelot_mirror_put() may seem bizarre, but the hardware model doesn't
map very well to the user space model. If the user wants to mirror the
ingress of swp1 towards swp2 and the ingress of swp3 towards swp4, we'd
have to program ANA:ANA:MIRRORPORTS with BIT(2) | BIT(4), and that would
make swp1 be mirrored towards swp4 too, and swp3 towards swp2. But there
are no tc-matchall rules to describe those actions.
Now, we could offload a matchall rule with multiple mirred actions, one
per desired mirror port, and force the user to stick to the multi-action
rule format for subsequent matchall filters. But both DSA and ocelot
have the flow_offload_has_one_action() check for the matchall offload,
plus the fact that it will get cumbersome to cross-check matchall
mirrors with flower mirrors (which will be added in the next patch).
As a result, we limit the configuration to a single mirror port, with
the possibility of lifting the restriction in the future.
Frames injected from the CPU don't get egress-mirrored, since they are
sent with the BYPASS bit in the injection frame header, and this
bypasses the analyzer module (effectively also the mirroring logic).
I don't know what to do/say about this.
Functionality was tested with:
tc qdisc add dev swp3 clsact
tc filter add dev swp3 ingress \
matchall skip_sw \
action mirred egress mirror dev swp1
and pinging through swp3, while seeing that the ICMP replies are
mirrored towards swp1.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-03-16 20:41:40 +00:00
|
|
|
struct ocelot_mirror {
|
|
|
|
refcount_t refcount;
|
|
|
|
int to;
|
|
|
|
};
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
struct ocelot_port {
|
|
|
|
struct ocelot *ocelot;
|
|
|
|
|
2020-07-13 16:57:01 +00:00
|
|
|
struct regmap *target;
|
2019-11-14 15:03:27 +00:00
|
|
|
|
net: mscc: ocelot: minimize holes in struct ocelot_port
Reorder members of struct ocelot_port to eliminate holes and reduce
structure size. Pahole says:
Before:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
bool vlan_aware; /* 16 1 */
/* XXX 7 bytes hole, try to pack */
const struct ocelot_bridge_vlan * pvid_vlan; /* 24 8 */
unsigned int ptp_skbs_in_flight; /* 32 4 */
u8 ptp_cmd; /* 36 1 */
/* XXX 3 bytes hole, try to pack */
struct sk_buff_head tx_skbs; /* 40 96 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
u8 ts_id; /* 136 1 */
/* XXX 3 bytes hole, try to pack */
phy_interface_t phy_mode; /* 140 4 */
bool is_dsa_8021q_cpu; /* 144 1 */
bool learn_ena; /* 145 1 */
/* XXX 6 bytes hole, try to pack */
struct net_device * bond; /* 152 8 */
bool lag_tx_active; /* 160 1 */
/* XXX 1 byte hole, try to pack */
u16 mrp_ring_id; /* 162 2 */
/* XXX 4 bytes hole, try to pack */
struct net_device * bridge; /* 168 8 */
int bridge_num; /* 176 4 */
u8 stp_state; /* 180 1 */
/* XXX 3 bytes hole, try to pack */
int speed; /* 184 4 */
/* size: 192, cachelines: 3, members: 18 */
/* sum members: 161, holes: 7, sum holes: 27 */
/* padding: 4 */
};
After:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
struct net_device * bond; /* 16 8 */
struct net_device * bridge; /* 24 8 */
const struct ocelot_bridge_vlan * pvid_vlan; /* 32 8 */
phy_interface_t phy_mode; /* 40 4 */
unsigned int ptp_skbs_in_flight; /* 44 4 */
struct sk_buff_head tx_skbs; /* 48 96 */
/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
u16 mrp_ring_id; /* 144 2 */
u8 ptp_cmd; /* 146 1 */
u8 ts_id; /* 147 1 */
u8 stp_state; /* 148 1 */
bool vlan_aware; /* 149 1 */
bool is_dsa_8021q_cpu; /* 150 1 */
bool learn_ena; /* 151 1 */
bool lag_tx_active; /* 152 1 */
/* XXX 3 bytes hole, try to pack */
int bridge_num; /* 156 4 */
int speed; /* 160 4 */
/* size: 168, cachelines: 3, members: 18 */
/* sum members: 161, holes: 1, sum holes: 3 */
/* padding: 4 */
/* last cacheline: 40 bytes */
};
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-11 10:06:36 +00:00
|
|
|
struct net_device *bond;
|
|
|
|
struct net_device *bridge;
|
|
|
|
|
2020-10-31 10:29:12 +00:00
|
|
|
/* VLAN that untagged frames are classified to, on ingress */
|
2021-10-20 17:58:52 +00:00
|
|
|
const struct ocelot_bridge_vlan *pvid_vlan;
|
2019-11-14 15:03:27 +00:00
|
|
|
|
net: mscc: ocelot: minimize holes in struct ocelot_port
Reorder members of struct ocelot_port to eliminate holes and reduce
structure size. Pahole says:
Before:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
bool vlan_aware; /* 16 1 */
/* XXX 7 bytes hole, try to pack */
const struct ocelot_bridge_vlan * pvid_vlan; /* 24 8 */
unsigned int ptp_skbs_in_flight; /* 32 4 */
u8 ptp_cmd; /* 36 1 */
/* XXX 3 bytes hole, try to pack */
struct sk_buff_head tx_skbs; /* 40 96 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
u8 ts_id; /* 136 1 */
/* XXX 3 bytes hole, try to pack */
phy_interface_t phy_mode; /* 140 4 */
bool is_dsa_8021q_cpu; /* 144 1 */
bool learn_ena; /* 145 1 */
/* XXX 6 bytes hole, try to pack */
struct net_device * bond; /* 152 8 */
bool lag_tx_active; /* 160 1 */
/* XXX 1 byte hole, try to pack */
u16 mrp_ring_id; /* 162 2 */
/* XXX 4 bytes hole, try to pack */
struct net_device * bridge; /* 168 8 */
int bridge_num; /* 176 4 */
u8 stp_state; /* 180 1 */
/* XXX 3 bytes hole, try to pack */
int speed; /* 184 4 */
/* size: 192, cachelines: 3, members: 18 */
/* sum members: 161, holes: 7, sum holes: 27 */
/* padding: 4 */
};
After:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
struct net_device * bond; /* 16 8 */
struct net_device * bridge; /* 24 8 */
const struct ocelot_bridge_vlan * pvid_vlan; /* 32 8 */
phy_interface_t phy_mode; /* 40 4 */
unsigned int ptp_skbs_in_flight; /* 44 4 */
struct sk_buff_head tx_skbs; /* 48 96 */
/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
u16 mrp_ring_id; /* 144 2 */
u8 ptp_cmd; /* 146 1 */
u8 ts_id; /* 147 1 */
u8 stp_state; /* 148 1 */
bool vlan_aware; /* 149 1 */
bool is_dsa_8021q_cpu; /* 150 1 */
bool learn_ena; /* 151 1 */
bool lag_tx_active; /* 152 1 */
/* XXX 3 bytes hole, try to pack */
int bridge_num; /* 156 4 */
int speed; /* 160 4 */
/* size: 168, cachelines: 3, members: 18 */
/* sum members: 161, holes: 1, sum holes: 3 */
/* padding: 4 */
/* last cacheline: 40 bytes */
};
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-11 10:06:36 +00:00
|
|
|
phy_interface_t phy_mode;
|
|
|
|
|
2021-10-12 11:40:36 +00:00
|
|
|
unsigned int ptp_skbs_in_flight;
|
2019-11-27 07:27:57 +00:00
|
|
|
struct sk_buff_head tx_skbs;
|
2020-01-06 01:34:15 +00:00
|
|
|
|
net: mscc: ocelot: minimize holes in struct ocelot_port
Reorder members of struct ocelot_port to eliminate holes and reduce
structure size. Pahole says:
Before:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
bool vlan_aware; /* 16 1 */
/* XXX 7 bytes hole, try to pack */
const struct ocelot_bridge_vlan * pvid_vlan; /* 24 8 */
unsigned int ptp_skbs_in_flight; /* 32 4 */
u8 ptp_cmd; /* 36 1 */
/* XXX 3 bytes hole, try to pack */
struct sk_buff_head tx_skbs; /* 40 96 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
u8 ts_id; /* 136 1 */
/* XXX 3 bytes hole, try to pack */
phy_interface_t phy_mode; /* 140 4 */
bool is_dsa_8021q_cpu; /* 144 1 */
bool learn_ena; /* 145 1 */
/* XXX 6 bytes hole, try to pack */
struct net_device * bond; /* 152 8 */
bool lag_tx_active; /* 160 1 */
/* XXX 1 byte hole, try to pack */
u16 mrp_ring_id; /* 162 2 */
/* XXX 4 bytes hole, try to pack */
struct net_device * bridge; /* 168 8 */
int bridge_num; /* 176 4 */
u8 stp_state; /* 180 1 */
/* XXX 3 bytes hole, try to pack */
int speed; /* 184 4 */
/* size: 192, cachelines: 3, members: 18 */
/* sum members: 161, holes: 7, sum holes: 27 */
/* padding: 4 */
};
After:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
struct net_device * bond; /* 16 8 */
struct net_device * bridge; /* 24 8 */
const struct ocelot_bridge_vlan * pvid_vlan; /* 32 8 */
phy_interface_t phy_mode; /* 40 4 */
unsigned int ptp_skbs_in_flight; /* 44 4 */
struct sk_buff_head tx_skbs; /* 48 96 */
/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
u16 mrp_ring_id; /* 144 2 */
u8 ptp_cmd; /* 146 1 */
u8 ts_id; /* 147 1 */
u8 stp_state; /* 148 1 */
bool vlan_aware; /* 149 1 */
bool is_dsa_8021q_cpu; /* 150 1 */
bool learn_ena; /* 151 1 */
bool lag_tx_active; /* 152 1 */
/* XXX 3 bytes hole, try to pack */
int bridge_num; /* 156 4 */
int speed; /* 160 4 */
/* size: 168, cachelines: 3, members: 18 */
/* sum members: 161, holes: 1, sum holes: 3 */
/* padding: 4 */
/* last cacheline: 40 bytes */
};
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-11 10:06:36 +00:00
|
|
|
u16 mrp_ring_id;
|
net: dsa: felix: create a template for the DSA tags on xmit
With this patch we try to kill 2 birds with 1 stone.
First of all, some switches that use tag_ocelot.c don't have the exact
same bitfield layout for the DSA tags. The destination ports field is
different for Seville VSC9953 for example. So the choices are to either
duplicate tag_ocelot.c into a new tag_seville.c (sub-optimal) or somehow
take into account a supposed ocelot->dest_ports_offset when packing this
field into the DSA injection header (again not ideal).
Secondly, tag_ocelot.c already needs to memset a 128-bit area to zero
and call some packing() functions of dubious performance in the
fastpath. And most of the values it needs to pack are pretty much
constant (BYPASS=1, SRC_PORT=CPU, DEST=port index). So it would be good
if we could improve that.
The proposed solution is to allocate a memory area per port at probe
time, initialize that with the statically defined bits as per chip
hardware revision, and just perform a simpler memcpy in the fastpath.
Other alternatives have been analyzed, such as:
- Create a separate tag_seville.c: too much code duplication for just 1
bit field difference.
- Create a separate DSA_TAG_PROTO_SEVILLE under tag_ocelot.c, just like
tag_brcm.c, which would have a separate .xmit function. Again, too
much code duplication for just 1 bit field difference.
- Allocate the template from the init function of the tag_ocelot.c
module, instead of from the driver: couldn't figure out a method of
accessing the correct port template corresponding to the correct
tagger in the .xmit function.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-13 16:57:04 +00:00
|
|
|
|
net: mscc: ocelot: minimize holes in struct ocelot_port
Reorder members of struct ocelot_port to eliminate holes and reduce
structure size. Pahole says:
Before:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
bool vlan_aware; /* 16 1 */
/* XXX 7 bytes hole, try to pack */
const struct ocelot_bridge_vlan * pvid_vlan; /* 24 8 */
unsigned int ptp_skbs_in_flight; /* 32 4 */
u8 ptp_cmd; /* 36 1 */
/* XXX 3 bytes hole, try to pack */
struct sk_buff_head tx_skbs; /* 40 96 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
u8 ts_id; /* 136 1 */
/* XXX 3 bytes hole, try to pack */
phy_interface_t phy_mode; /* 140 4 */
bool is_dsa_8021q_cpu; /* 144 1 */
bool learn_ena; /* 145 1 */
/* XXX 6 bytes hole, try to pack */
struct net_device * bond; /* 152 8 */
bool lag_tx_active; /* 160 1 */
/* XXX 1 byte hole, try to pack */
u16 mrp_ring_id; /* 162 2 */
/* XXX 4 bytes hole, try to pack */
struct net_device * bridge; /* 168 8 */
int bridge_num; /* 176 4 */
u8 stp_state; /* 180 1 */
/* XXX 3 bytes hole, try to pack */
int speed; /* 184 4 */
/* size: 192, cachelines: 3, members: 18 */
/* sum members: 161, holes: 7, sum holes: 27 */
/* padding: 4 */
};
After:
struct ocelot_port {
struct ocelot * ocelot; /* 0 8 */
struct regmap * target; /* 8 8 */
struct net_device * bond; /* 16 8 */
struct net_device * bridge; /* 24 8 */
const struct ocelot_bridge_vlan * pvid_vlan; /* 32 8 */
phy_interface_t phy_mode; /* 40 4 */
unsigned int ptp_skbs_in_flight; /* 44 4 */
struct sk_buff_head tx_skbs; /* 48 96 */
/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
u16 mrp_ring_id; /* 144 2 */
u8 ptp_cmd; /* 146 1 */
u8 ts_id; /* 147 1 */
u8 stp_state; /* 148 1 */
bool vlan_aware; /* 149 1 */
bool is_dsa_8021q_cpu; /* 150 1 */
bool learn_ena; /* 151 1 */
bool lag_tx_active; /* 152 1 */
/* XXX 3 bytes hole, try to pack */
int bridge_num; /* 156 4 */
int speed; /* 160 4 */
/* size: 168, cachelines: 3, members: 18 */
/* sum members: 161, holes: 1, sum holes: 3 */
/* padding: 4 */
/* last cacheline: 40 bytes */
};
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-05-11 10:06:36 +00:00
|
|
|
u8 ptp_cmd;
|
|
|
|
u8 ts_id;
|
|
|
|
|
|
|
|
u8 stp_state;
|
|
|
|
bool vlan_aware;
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 01:00:09 +00:00
|
|
|
bool is_dsa_8021q_cpu;
|
2021-02-12 15:15:59 +00:00
|
|
|
bool learn_ena;
|
2021-02-05 22:02:14 +00:00
|
|
|
|
2021-02-05 22:02:19 +00:00
|
|
|
bool lag_tx_active;
|
2021-03-16 20:10:18 +00:00
|
|
|
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
int bridge_num;
|
net: dsa: felix: enable cut-through forwarding between ports by default
The VSC9959 switch embedded within NXP LS1028A (and that version of
Ocelot switches only) supports cut-through forwarding - meaning it can
start the process of looking up the destination ports for a packet, and
forward towards those ports, before the entire packet has been received
(as opposed to the store-and-forward mode).
The up side is having lower forwarding latency for large packets. The
down side is that frames with FCS errors are forwarded instead of being
dropped. However, erroneous frames do not result in incorrect updates of
the FDB or incorrect policer updates, since these processes are deferred
inside the switch to the end of frame. Since the switch starts the
cut-through forwarding process after all packet headers (including IP,
if any) have been processed, packets with large headers and small
payload do not see the benefit of lower forwarding latency.
There are two cases that need special attention.
The first is when a packet is multicast (or flooded) to multiple
destinations, one of which doesn't have cut-through forwarding enabled.
The switch deals with this automatically by disabling cut-through
forwarding for the frame towards all destination ports.
The second is when a packet is forwarded from a port of lower link speed
towards a port of higher link speed. This is not handled by the hardware
and needs software intervention.
Since we practically need to update the cut-through forwarding domain
from paths that aren't serialized by the rtnl_mutex (phylink
mac_link_down/mac_link_up ops), this means we need to serialize physical
link events with user space updates of bonding/bridging domains.
Enabling cut-through forwarding is done per {egress port, traffic class}.
I don't see any reason why this would be a configurable option as long
as it works without issues, and there doesn't appear to be any user
space configuration tool to toggle this on/off, so this patch enables
cut-through forwarding on all eligible ports and traffic classes.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20211125125808.2383984-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-11-25 12:58:08 +00:00
|
|
|
|
|
|
|
int speed;
|
2019-11-14 15:03:27 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ocelot {
|
|
|
|
struct device *dev;
|
net: mscc: ocelot: register devlink ports
Add devlink integration into the mscc_ocelot switchdev driver. All
physical ports (i.e. the unused ones as well) except the CPU port module
at ocelot->num_phys_ports are registered with devlink, and that requires
keeping the devlink_port structure outside struct ocelot_port_private,
since the latter has a 1:1 mapping with a struct net_device (which does
not exist for unused ports).
Since we use devlink_port_type_eth_set to link the devlink port to the
net_device, we can as well remove the .ndo_get_phys_port_name and
.ndo_get_port_parent_id implementations, since devlink takes care of
retrieving the port name and number automatically, once
.ndo_get_devlink_port is implemented.
Note that the felix DSA driver is already integrated with devlink by
default, since that is a thing that the DSA core takes care of. This is
the reason why these devlink stubs were put in ocelot_net.c and not in
the common library. It is also the reason why ocelot::devlink is a
pointer and not a full structure embedded inside struct ocelot: because
the mscc_ocelot driver allocates that by itself (as the container of
struct ocelot, in fact), but in the case of felix, it is DSA who
allocates the devlink, and felix just propagates the pointer towards
struct ocelot.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-15 02:11:18 +00:00
|
|
|
struct devlink *devlink;
|
|
|
|
struct devlink_port *devlink_ports;
|
2019-11-14 15:03:27 +00:00
|
|
|
|
|
|
|
const struct ocelot_ops *ops;
|
|
|
|
struct regmap *targets[TARGET_MAX];
|
|
|
|
struct regmap_field *regfields[REGFIELD_MAX];
|
|
|
|
const u32 *const *map;
|
|
|
|
const struct ocelot_stat_layout *stats_layout;
|
2022-02-13 19:12:54 +00:00
|
|
|
struct list_head stats_regions;
|
2019-11-14 15:03:27 +00:00
|
|
|
unsigned int num_stats;
|
|
|
|
|
net: mscc: ocelot: configure watermarks using devlink-sb
Using devlink-sb, we can configure 12/16 (the important 75%) of the
switch's controlling watermarks for congestion drops, and we can monitor
50% of the watermark occupancies (we can monitor the reservation
watermarks, but not the sharing watermarks, which are exposed as pool
sizes).
The following definitions can be made:
SB_BUF=0 # The devlink-sb for frame buffers
SB_REF=1 # The devlink-sb for frame references
POOL_ING=0 # The pool for ingress traffic. Both devlink-sb instances
# have one of these.
POOL_EGR=1 # The pool for egress traffic. Both devlink-sb instances
# have one of these.
Editing the hardware watermarks is done in the following way:
BUF_xxxx_I is accessed when sb=$SB_BUF and pool=$POOL_ING
REF_xxxx_I is accessed when sb=$SB_REF and pool=$POOL_ING
BUF_xxxx_E is accessed when sb=$SB_BUF and pool=$POOL_EGR
REF_xxxx_E is accessed when sb=$SB_REF and pool=$POOL_EGR
Configuring the sharing watermarks for COL_SHR(dp=0) is done implicitly
by modifying the corresponding pool size. By default, the pool size has
maximum size, so this can be skipped.
devlink sb pool set pci/0000:00:00.5 sb $SB_BUF pool $POOL_ING \
size 129840 thtype static
Since by default there is no buffer reservation, the above command has
maxed out BUF_COL_SHR_I(dp=0).
Configuring the per-port reservation watermark (P_RSRV) is done in the
following way:
devlink sb port pool set pci/0000:00:00.5/0 sb $SB_BUF \
pool $POOL_ING th 1000
The above command sets BUF_P_RSRV_I(port 0) to 1000 bytes. After this
command, the sharing watermarks are internally reconfigured with 1000
bytes less, i.e. from 129840 bytes to 128840 bytes.
Configuring the per-port-tc reservation watermarks (Q_RSRV) is done in
the following way:
for tc in {0..7}; do
devlink sb tc bind set pci/0000:00:00.5/0 sb 0 tc $tc \
type ingress pool $POOL_ING \
th 3000
done
The above command sets BUF_Q_RSRV_I(port 0, tc 0..7) to 3000 bytes.
The sharing watermarks are again reconfigured with 24000 bytes less.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-15 02:11:20 +00:00
|
|
|
u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
|
2021-01-15 02:11:11 +00:00
|
|
|
int packet_buffer_size;
|
|
|
|
int num_frame_refs;
|
2020-05-03 22:20:26 +00:00
|
|
|
int num_mact_rows;
|
2019-11-14 15:03:27 +00:00
|
|
|
|
|
|
|
struct ocelot_port **ports;
|
|
|
|
|
|
|
|
u8 base_mac[ETH_ALEN];
|
|
|
|
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-20 17:58:49 +00:00
|
|
|
struct list_head vlans;
|
2022-02-16 14:30:10 +00:00
|
|
|
struct list_head traps;
|
2022-02-23 14:00:54 +00:00
|
|
|
struct list_head lag_fdbs;
|
2019-11-14 15:03:27 +00:00
|
|
|
|
net: mscc: ocelot: fix dropping of unknown IPv4 multicast on Seville
The current assumption is that the felix DSA driver has flooding knobs
per traffic class, while ocelot switchdev has a single flooding knob.
This was correct for felix VSC9959 and ocelot VSC7514, but with the
introduction of seville VSC9953, we see a switch driven by felix.c which
has a single flooding knob.
So it is clear that we must do what should have been done from the
beginning, which is not to overwrite the configuration done by ocelot.c
in felix, but instead to teach the common ocelot library about the
differences in our switches, and set up the flooding PGIDs centrally.
The effect that the bogus iteration through FELIX_NUM_TC has upon
seville is quite dramatic. ANA_FLOODING is located at 0x00b548, and
ANA_FLOODING_IPMC is located at 0x00b54c. So the bogus iteration will
actually overwrite ANA_FLOODING_IPMC when attempting to write
ANA_FLOODING[1]. There is no ANA_FLOODING[1] in sevile, just ANA_FLOODING.
And when ANA_FLOODING_IPMC is overwritten with a bogus value, the effect
is that ANA_FLOODING_IPMC gets the value of 0x0003CF7D:
MC6_DATA = 61,
MC6_CTRL = 61,
MC4_DATA = 60,
MC4_CTRL = 0.
Because MC4_CTRL is zero, this means that IPv4 multicast control packets
are not flooded, but dropped. An invalid configuration, and this is how
the issue was actually spotted.
Reported-by: Eldar Gasanov <eldargasanov2@gmail.com>
Reported-by: Maxim Kochetkov <fido_max@inbox.ru>
Tested-by: Eldar Gasanov <eldargasanov2@gmail.com>
Fixes: 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch")
Fixes: 3c7b51bd39b2 ("net: dsa: felix: allow flooding for all traffic classes")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20201204175416.1445937-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-04 17:54:16 +00:00
|
|
|
/* Switches like VSC9959 have flooding per traffic class */
|
|
|
|
int num_flooding_pgids;
|
|
|
|
|
net: mscc: ocelot: eliminate confusion between CPU and NPI port
Ocelot has the concept of a CPU port. The CPU port is represented in the
forwarding and the queueing system, but it is not a physical device. The
CPU port can either be accessed via register-based injection/extraction
(which is the case of Ocelot), via Frame-DMA (similar to the first one),
or "connected" to a physical Ethernet port (called NPI in the datasheet)
which is the case of the Felix DSA switch.
In Ocelot the CPU port is at index 11.
In Felix the CPU port is at index 6.
The CPU bit is treated special in the forwarding, as it is never cleared
from the forwarding port mask (once added to it). Other than that, it is
treated the same as a normal front port.
Both Felix and Ocelot should use the CPU port in the same way. This
means that Felix should not use the NPI port directly when forwarding to
the CPU, but instead use the CPU port.
This patch is fixing this such that Felix will use port 6 as its CPU
port, and just use the NPI port to carry the traffic.
Therefore, eliminate the "ocelot->cpu" variable which was holding the
index of the NPI port for Felix, and the index of the CPU port module
for Ocelot, so the variable was actually configuring different things
for different drivers and causing at least part of the confusion.
Also remove the "ocelot->num_cpu_ports" variable, which is the result of
another confusion. The 2 CPU ports mentioned in the datasheet are
because there are two frame extraction channels (register based or DMA
based). This is of no relevance to the driver at the moment, and
invisible to the analyzer module.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 14:50:02 +00:00
|
|
|
/* In tables like ANA:PORT and the ANA:PGID:PGID mask,
|
|
|
|
* the CPU is located after the physical ports (at the
|
|
|
|
* num_phys_ports index).
|
|
|
|
*/
|
2019-11-14 15:03:27 +00:00
|
|
|
u8 num_phys_ports;
|
|
|
|
|
2020-03-27 19:55:47 +00:00
|
|
|
int npi;
|
|
|
|
|
2021-01-29 01:00:03 +00:00
|
|
|
enum ocelot_tag_prefix npi_inj_prefix;
|
|
|
|
enum ocelot_tag_prefix npi_xtr_prefix;
|
2020-03-27 19:55:47 +00:00
|
|
|
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
unsigned long bridges;
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
struct list_head multicast;
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 02:27:38 +00:00
|
|
|
struct list_head pgids;
|
2019-11-14 15:03:27 +00:00
|
|
|
|
net: mscc: ocelot: create TCAM skeleton from tc filter chains
For Ocelot switches, there are 2 ingress pipelines for flow offload
rules: VCAP IS1 (Ingress Classification) and IS2 (Security Enforcement).
IS1 and IS2 support different sets of actions. The pipeline order for a
packet on ingress is:
Basic classification -> VCAP IS1 -> VCAP IS2
Furthermore, IS1 is looked up 3 times, and IS2 is looked up twice (each
TCAM entry can be configured to match only on the first lookup, or only
on the second, or on both etc).
Because the TCAMs are completely independent in hardware, and because of
the fixed pipeline, we actually have very limited options when it comes
to offloading complex rules to them while still maintaining the same
semantics with the software data path.
This patch maps flow offload rules to ingress TCAMs according to a
predefined chain index number. There is going to be a script in
selftests that clarifies the usage model.
There is also an egress TCAM (VCAP ES0, the Egress Rewriter), which is
modeled on top of the default chain 0 of the egress qdisc, because it
doesn't have multiple lookups.
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Co-developed-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-02 12:02:22 +00:00
|
|
|
struct list_head dummy_rules;
|
|
|
|
struct ocelot_vcap_block block[3];
|
2021-11-18 10:12:02 +00:00
|
|
|
struct ocelot_vcap_policer vcap_pol;
|
2020-09-29 22:27:26 +00:00
|
|
|
struct vcap_props *vcap;
|
net: mscc: ocelot: add port mirroring support using tc-matchall
Ocelot switches perform port-based ingress mirroring if
ANA:PORT:PORT_CFG field SRC_MIRROR_ENA is set, and egress mirroring if
the port is in ANA:ANA:EMIRRORPORTS.
Both ingress-mirrored and egress-mirrored frames are copied to the port
mask from ANA:ANA:MIRRORPORTS.
So the choice of limiting to a single mirror port via ocelot_mirror_get()
and ocelot_mirror_put() may seem bizarre, but the hardware model doesn't
map very well to the user space model. If the user wants to mirror the
ingress of swp1 towards swp2 and the ingress of swp3 towards swp4, we'd
have to program ANA:ANA:MIRRORPORTS with BIT(2) | BIT(4), and that would
make swp1 be mirrored towards swp4 too, and swp3 towards swp2. But there
are no tc-matchall rules to describe those actions.
Now, we could offload a matchall rule with multiple mirred actions, one
per desired mirror port, and force the user to stick to the multi-action
rule format for subsequent matchall filters. But both DSA and ocelot
have the flow_offload_has_one_action() check for the matchall offload,
plus the fact that it will get cumbersome to cross-check matchall
mirrors with flower mirrors (which will be added in the next patch).
As a result, we limit the configuration to a single mirror port, with
the possibility of lifting the restriction in the future.
Frames injected from the CPU don't get egress-mirrored, since they are
sent with the BYPASS bit in the injection frame header, and this
bypasses the analyzer module (effectively also the mirroring logic).
I don't know what to do/say about this.
Functionality was tested with:
tc qdisc add dev swp3 clsact
tc filter add dev swp3 ingress \
matchall skip_sw \
action mirred egress mirror dev swp1
and pinging through swp3, while seeing that the ICMP replies are
mirrored towards swp1.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-03-16 20:41:40 +00:00
|
|
|
struct ocelot_mirror *mirror;
|
2020-02-29 14:31:10 +00:00
|
|
|
|
2021-11-18 10:12:00 +00:00
|
|
|
struct ocelot_psfp_list psfp;
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
/* Workqueue to check statistics for overflow with its lock */
|
|
|
|
struct mutex stats_lock;
|
|
|
|
u64 *stats;
|
|
|
|
struct delayed_work stats_work;
|
|
|
|
struct workqueue_struct *stats_queue;
|
|
|
|
|
2021-10-24 17:17:51 +00:00
|
|
|
/* Lock for serializing access to the MAC table */
|
|
|
|
struct mutex mact_lock;
|
net: dsa: felix: enable cut-through forwarding between ports by default
The VSC9959 switch embedded within NXP LS1028A (and that version of
Ocelot switches only) supports cut-through forwarding - meaning it can
start the process of looking up the destination ports for a packet, and
forward towards those ports, before the entire packet has been received
(as opposed to the store-and-forward mode).
The up side is having lower forwarding latency for large packets. The
down side is that frames with FCS errors are forwarded instead of being
dropped. However, erroneous frames do not result in incorrect updates of
the FDB or incorrect policer updates, since these processes are deferred
inside the switch to the end of frame. Since the switch starts the
cut-through forwarding process after all packet headers (including IP,
if any) have been processed, packets with large headers and small
payload do not see the benefit of lower forwarding latency.
There are two cases that need special attention.
The first is when a packet is multicast (or flooded) to multiple
destinations, one of which doesn't have cut-through forwarding enabled.
The switch deals with this automatically by disabling cut-through
forwarding for the frame towards all destination ports.
The second is when a packet is forwarded from a port of lower link speed
towards a port of higher link speed. This is not handled by the hardware
and needs software intervention.
Since we practically need to update the cut-through forwarding domain
from paths that aren't serialized by the rtnl_mutex (phylink
mac_link_down/mac_link_up ops), this means we need to serialize physical
link events with user space updates of bonding/bridging domains.
Enabling cut-through forwarding is done per {egress port, traffic class}.
I don't see any reason why this would be a configurable option as long
as it works without issues, and there doesn't appear to be any user
space configuration tool to toggle this on/off, so this patch enables
cut-through forwarding on all eligible ports and traffic classes.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20211125125808.2383984-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-11-25 12:58:08 +00:00
|
|
|
/* Lock for serializing forwarding domain changes */
|
|
|
|
struct mutex fwd_domain_lock;
|
2021-10-24 17:17:51 +00:00
|
|
|
|
2020-12-12 19:16:12 +00:00
|
|
|
struct workqueue_struct *owq;
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
u8 ptp:1;
|
|
|
|
struct ptp_clock *ptp_clock;
|
|
|
|
struct ptp_clock_info ptp_info;
|
|
|
|
struct hwtstamp_config hwtstamp_config;
|
2021-10-12 11:40:36 +00:00
|
|
|
unsigned int ptp_skbs_in_flight;
|
|
|
|
/* Protects the 2-step TX timestamp ID logic */
|
|
|
|
spinlock_t ts_id_lock;
|
2019-11-14 15:03:27 +00:00
|
|
|
/* Protects the PTP interface state */
|
|
|
|
struct mutex ptp_lock;
|
|
|
|
/* Protects the PTP clock */
|
|
|
|
spinlock_t ptp_clock_lock;
|
2020-04-20 02:46:49 +00:00
|
|
|
struct ptp_pin_desc ptp_pins[OCELOT_PTP_PINS_NUM];
|
2021-12-09 15:49:11 +00:00
|
|
|
|
|
|
|
struct ocelot_fdma *fdma;
|
2019-11-14 15:03:27 +00:00
|
|
|
};
|
|
|
|
|
2020-03-29 11:52:00 +00:00
|
|
|
struct ocelot_policer {
|
|
|
|
u32 rate; /* kilobit per second */
|
|
|
|
u32 burst; /* bytes */
|
|
|
|
};
|
|
|
|
|
2022-02-13 19:12:53 +00:00
|
|
|
#define ocelot_bulk_read_rix(ocelot, reg, ri, buf, count) \
|
|
|
|
__ocelot_bulk_read_ix(ocelot, reg, reg##_RSZ * (ri), buf, count)
|
|
|
|
|
2022-02-13 19:12:52 +00:00
|
|
|
#define ocelot_read_ix(ocelot, reg, gi, ri) \
|
|
|
|
__ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
|
|
|
|
#define ocelot_read_gix(ocelot, reg, gi) \
|
|
|
|
__ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
|
|
|
|
#define ocelot_read_rix(ocelot, reg, ri) \
|
|
|
|
__ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
|
|
|
|
#define ocelot_read(ocelot, reg) \
|
|
|
|
__ocelot_read_ix(ocelot, reg, 0)
|
|
|
|
|
|
|
|
#define ocelot_write_ix(ocelot, val, reg, gi, ri) \
|
|
|
|
__ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
|
|
|
|
#define ocelot_write_gix(ocelot, val, reg, gi) \
|
|
|
|
__ocelot_write_ix(ocelot, val, reg, reg##_GSZ * (gi))
|
|
|
|
#define ocelot_write_rix(ocelot, val, reg, ri) \
|
|
|
|
__ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri))
|
2019-11-14 15:03:27 +00:00
|
|
|
#define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0)
|
|
|
|
|
2022-02-13 19:12:52 +00:00
|
|
|
#define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) \
|
|
|
|
__ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
|
|
|
|
#define ocelot_rmw_gix(ocelot, val, m, reg, gi) \
|
|
|
|
__ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi))
|
|
|
|
#define ocelot_rmw_rix(ocelot, val, m, reg, ri) \
|
|
|
|
__ocelot_rmw_ix(ocelot, val, m, reg, reg##_RSZ * (ri))
|
2019-11-14 15:03:27 +00:00
|
|
|
#define ocelot_rmw(ocelot, val, m, reg) __ocelot_rmw_ix(ocelot, val, m, reg, 0)
|
|
|
|
|
2022-02-13 19:12:52 +00:00
|
|
|
#define ocelot_field_write(ocelot, reg, val) \
|
|
|
|
regmap_field_write((ocelot)->regfields[(reg)], (val))
|
|
|
|
#define ocelot_field_read(ocelot, reg, val) \
|
|
|
|
regmap_field_read((ocelot)->regfields[(reg)], (val))
|
|
|
|
#define ocelot_fields_write(ocelot, id, reg, val) \
|
|
|
|
regmap_fields_write((ocelot)->regfields[(reg)], (id), (val))
|
|
|
|
#define ocelot_fields_read(ocelot, id, reg, val) \
|
|
|
|
regmap_fields_read((ocelot)->regfields[(reg)], (id), (val))
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-13 16:57:03 +00:00
|
|
|
|
net: mscc: ocelot: introduce a new ocelot_target_{read,write} API
There are some targets (register blocks) in the Ocelot switch that are
instantiated more than once. For example, the VCAP IS1, IS2 and ES0
blocks all share the same register layout for interacting with the cache
for the TCAM and the action RAM.
For the VCAPs, the procedure for servicing them is actually common. We
just need an API specifying which VCAP we are talking to, and we do that
via these raw ocelot_target_read and ocelot_target_write accessors.
In plain ocelot_read, the target is encoded into the register enum
itself:
u16 target = reg >> TARGET_OFFSET;
For the VCAPs, the registers are currently defined like this:
enum ocelot_reg {
[...]
S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET,
S2_CORE_MV_CFG,
S2_CACHE_ENTRY_DAT,
S2_CACHE_MASK_DAT,
S2_CACHE_ACTION_DAT,
S2_CACHE_CNT_DAT,
S2_CACHE_TG_DAT,
[...]
};
which is precisely what we want to avoid, because we'd have to duplicate
the same register map for S1 and for S0, and then figure out how to pass
VCAP instance-specific registers to the ocelot_read calls (basically
another lookup table that undoes the effect of shifting with
TARGET_OFFSET).
So for some targets, propose a more raw API, similar to what is
currently done with ocelot_port_readl and ocelot_port_writel. Those
targets can only be accessed with ocelot_target_{read,write} and not
with ocelot_{read,write} after the conversion, which is fine.
The VCAP registers are not actually modified to use this new API as of
this patch. They will be modified in the next one.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-29 22:27:21 +00:00
|
|
|
#define ocelot_target_read_ix(ocelot, target, reg, gi, ri) \
|
|
|
|
__ocelot_target_read_ix(ocelot, target, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
|
|
|
|
#define ocelot_target_read_gix(ocelot, target, reg, gi) \
|
|
|
|
__ocelot_target_read_ix(ocelot, target, reg, reg##_GSZ * (gi))
|
|
|
|
#define ocelot_target_read_rix(ocelot, target, reg, ri) \
|
|
|
|
__ocelot_target_read_ix(ocelot, target, reg, reg##_RSZ * (ri))
|
|
|
|
#define ocelot_target_read(ocelot, target, reg) \
|
|
|
|
__ocelot_target_read_ix(ocelot, target, reg, 0)
|
|
|
|
|
|
|
|
#define ocelot_target_write_ix(ocelot, target, val, reg, gi, ri) \
|
|
|
|
__ocelot_target_write_ix(ocelot, target, val, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
|
|
|
|
#define ocelot_target_write_gix(ocelot, target, val, reg, gi) \
|
|
|
|
__ocelot_target_write_ix(ocelot, target, val, reg, reg##_GSZ * (gi))
|
|
|
|
#define ocelot_target_write_rix(ocelot, target, val, reg, ri) \
|
|
|
|
__ocelot_target_write_ix(ocelot, target, val, reg, reg##_RSZ * (ri))
|
|
|
|
#define ocelot_target_write(ocelot, target, val, reg) \
|
|
|
|
__ocelot_target_write_ix(ocelot, target, val, reg, 0)
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
/* I/O */
|
|
|
|
u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
|
|
|
|
void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
|
net: dsa: felix: implement port flushing on .phylink_mac_link_down
There are several issues which may be seen when the link goes down while
forwarding traffic, all of which can be attributed to the fact that the
port flushing procedure from the reference manual was not closely
followed.
With flow control enabled on both the ingress port and the egress port,
it may happen when a link goes down that Ethernet packets are in flight.
In flow control mode, frames are held back and not dropped. When there
is enough traffic in flight (example: iperf3 TCP), then the ingress port
might enter congestion and never exit that state. This is a problem,
because it is the egress port's link that went down, and that has caused
the inability of the ingress port to send packets to any other port.
This is solved by flushing the egress port's queues when it goes down.
There is also a problem when performing stream splitting for
IEEE 802.1CB traffic (not yet upstream, but a sort of multicast,
basically). There, if one port from the destination ports mask goes
down, splitting the stream towards the other destinations will no longer
be performed. This can be traced down to this line:
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
which should have been instead, as per the reference manual:
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
DEV_MAC_ENA_CFG);
Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not
DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is
the case, but apparently multicasting to several ports will cause issues
if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set.
I am not sure what the state of the Ocelot VSC7514 driver is, but
probably not as bad as Felix/Seville, since VSC7514 uses phylib and has
the following in ocelot_adjust_link:
if (!phydev->link)
return;
therefore the port is not really put down when the link is lost, unlike
the DSA drivers which use .phylink_mac_link_down for that.
Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it
needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h
which are not exported in include/soc/mscc/ and a bugfix patch should
probably not move headers around.
Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-08 17:36:27 +00:00
|
|
|
void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg);
|
2022-02-13 19:12:53 +00:00
|
|
|
int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf,
|
|
|
|
int count);
|
2019-11-14 15:03:27 +00:00
|
|
|
u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
|
|
|
|
void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
|
|
|
|
void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
|
|
|
|
u32 offset);
|
net: mscc: ocelot: introduce a new ocelot_target_{read,write} API
There are some targets (register blocks) in the Ocelot switch that are
instantiated more than once. For example, the VCAP IS1, IS2 and ES0
blocks all share the same register layout for interacting with the cache
for the TCAM and the action RAM.
For the VCAPs, the procedure for servicing them is actually common. We
just need an API specifying which VCAP we are talking to, and we do that
via these raw ocelot_target_read and ocelot_target_write accessors.
In plain ocelot_read, the target is encoded into the register enum
itself:
u16 target = reg >> TARGET_OFFSET;
For the VCAPs, the registers are currently defined like this:
enum ocelot_reg {
[...]
S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET,
S2_CORE_MV_CFG,
S2_CACHE_ENTRY_DAT,
S2_CACHE_MASK_DAT,
S2_CACHE_ACTION_DAT,
S2_CACHE_CNT_DAT,
S2_CACHE_TG_DAT,
[...]
};
which is precisely what we want to avoid, because we'd have to duplicate
the same register map for S1 and for S0, and then figure out how to pass
VCAP instance-specific registers to the ocelot_read calls (basically
another lookup table that undoes the effect of shifting with
TARGET_OFFSET).
So for some targets, propose a more raw API, similar to what is
currently done with ocelot_port_readl and ocelot_port_writel. Those
targets can only be accessed with ocelot_target_{read,write} and not
with ocelot_{read,write} after the conversion, which is fine.
The VCAP registers are not actually modified to use this new API as of
this patch. They will be modified in the next one.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-29 22:27:21 +00:00
|
|
|
u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
|
|
|
|
u32 reg, u32 offset);
|
|
|
|
void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
|
|
|
|
u32 val, u32 reg, u32 offset);
|
2019-11-14 15:03:27 +00:00
|
|
|
|
2021-04-27 04:22:03 +00:00
|
|
|
/* Packet I/O */
|
2021-02-13 22:37:54 +00:00
|
|
|
bool ocelot_can_inject(struct ocelot *ocelot, int grp);
|
|
|
|
void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
|
|
|
|
u32 rew_op, struct sk_buff *skb);
|
2021-12-09 15:49:08 +00:00
|
|
|
void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag);
|
2021-02-13 22:37:59 +00:00
|
|
|
int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
|
net: dsa: tag_ocelot_8021q: add support for PTP timestamping
For TX timestamping, we use the felix_txtstamp method which is common
with the regular (non-8021q) ocelot tagger. This method says that skb
deferral is needed, prepares a timestamp request ID, and puts a clone of
the skb in a queue waiting for the timestamp IRQ.
felix_txtstamp is called by dsa_skb_tx_timestamp() just before the
tagger's xmit method. In the tagger xmit, we divert the packets
classified by dsa_skb_tx_timestamp() as PTP towards the MMIO-based
injection registers, and we declare them as dead towards dsa_slave_xmit.
If not PTP, we proceed with normal tag_8021q stuff.
Then the timestamp IRQ fires, the clone queued up from felix_txtstamp is
matched to the TX timestamp retrieved from the switch's FIFO based on
the timestamp request ID, and the clone is delivered to the stack.
On RX, thanks to the VCAP IS2 rule that redirects the frames with an
EtherType for 1588 towards two destinations:
- the CPU port module (for MMIO based extraction) and
- if the "no XTR IRQ" workaround is in place, the dsa_8021q CPU port
the relevant data path processing starts in the ptp_classify_raw BPF
classifier installed by DSA in the RX data path (post tagger, which is
completely unaware that it saw a PTP packet).
This time we can't reuse the same implementation of .port_rxtstamp that
also works with the default ocelot tagger. That is because felix_rxtstamp
is given an skb with a freshly stripped DSA header, and it says "I don't
need deferral for its RX timestamp, it's right in it, let me show you";
and it just points to the header right behind skb->data, from where it
unpacks the timestamp and annotates the skb with it.
The same thing cannot happen with tag_ocelot_8021q, because for one
thing, the skb did not have an extraction frame header in the first
place, but a VLAN tag with no timestamp information. So the code paths
in felix_rxtstamp for the regular and 8021q tagger are completely
independent. With tag_8021q, the timestamp must come from the packet's
duplicate delivered to the CPU port module, but there is potentially
complex logic to be handled [ and prone to reordering ] if we were to
just start reading packets from the CPU port module, and try to match
them to the one we received over Ethernet and which needs an RX
timestamp. So we do something simple: we tell DSA "give me some time to
think" (we request skb deferral by returning false from .port_rxtstamp)
and we just drop the frame we got over Ethernet with no attempt to match
it to anything - we just treat it as a notification that there's data to
be processed from the CPU port module's queues. Then we proceed to read
the packets from those, one by one, which we deliver up the stack,
timestamped, using netif_rx - the same function that any driver would
use anyway if it needed RX timestamp deferral. So the assumption is that
we'll come across the PTP packet that triggered the CPU extraction
notification eventually, but we don't know when exactly. Thanks to the
VCAP IS2 trap/redirect rule and the exclusion of the CPU port module
from the flooding replicators, only PTP frames should be present in the
CPU port module's RX queues anyway.
There is just one conflict between the VCAP IS2 trapping rule and the
semantics of the BPF classifier. Namely, ptp_classify_raw() deems
general messages as non-timestampable, but still, those are trapped to
the CPU port module since they have an EtherType of ETH_P_1588. So, if
the "no XTR IRQ" workaround is in place, we need to run another BPF
classifier on the frames extracted over MMIO, to avoid duplicates being
sent to the stack (once over Ethernet, once over MMIO). It doesn't look
like it's possible to install VCAP IS2 rules based on keys extracted
from the 1588 frame headers.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-13 22:38:01 +00:00
|
|
|
void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
|
2021-12-09 15:49:09 +00:00
|
|
|
void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb,
|
|
|
|
u64 timestamp);
|
2021-02-13 22:37:54 +00:00
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
/* Hardware initialization */
|
|
|
|
int ocelot_regfields_init(struct ocelot *ocelot,
|
|
|
|
const struct reg_field *const regfields);
|
|
|
|
struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res);
|
|
|
|
int ocelot_init(struct ocelot *ocelot);
|
|
|
|
void ocelot_deinit(struct ocelot *ocelot);
|
|
|
|
void ocelot_init_port(struct ocelot *ocelot, int port);
|
2020-09-18 01:07:30 +00:00
|
|
|
void ocelot_deinit_port(struct ocelot *ocelot, int port);
|
2019-11-14 15:03:27 +00:00
|
|
|
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
void ocelot_port_set_dsa_8021q_cpu(struct ocelot *ocelot, int port);
|
|
|
|
void ocelot_port_unset_dsa_8021q_cpu(struct ocelot *ocelot, int port);
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
/* DSA callbacks */
|
|
|
|
void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data);
|
|
|
|
void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data);
|
|
|
|
int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset);
|
|
|
|
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
|
|
|
|
struct ethtool_ts_info *info);
|
|
|
|
void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
|
2021-08-19 17:40:07 +00:00
|
|
|
int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
|
|
|
|
struct netlink_ext_ack *extack);
|
2019-11-14 15:03:27 +00:00
|
|
|
void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
|
net: dsa: felix: enable cut-through forwarding between ports by default
The VSC9959 switch embedded within NXP LS1028A (and that version of
Ocelot switches only) supports cut-through forwarding - meaning it can
start the process of looking up the destination ports for a packet, and
forward towards those ports, before the entire packet has been received
(as opposed to the store-and-forward mode).
The up side is having lower forwarding latency for large packets. The
down side is that frames with FCS errors are forwarded instead of being
dropped. However, erroneous frames do not result in incorrect updates of
the FDB or incorrect policer updates, since these processes are deferred
inside the switch to the end of frame. Since the switch starts the
cut-through forwarding process after all packet headers (including IP,
if any) have been processed, packets with large headers and small
payload do not see the benefit of lower forwarding latency.
There are two cases that need special attention.
The first is when a packet is multicast (or flooded) to multiple
destinations, one of which doesn't have cut-through forwarding enabled.
The switch deals with this automatically by disabling cut-through
forwarding for the frame towards all destination ports.
The second is when a packet is forwarded from a port of lower link speed
towards a port of higher link speed. This is not handled by the hardware
and needs software intervention.
Since we practically need to update the cut-through forwarding domain
from paths that aren't serialized by the rtnl_mutex (phylink
mac_link_down/mac_link_up ops), this means we need to serialize physical
link events with user space updates of bonding/bridging domains.
Enabling cut-through forwarding is done per {egress port, traffic class}.
I don't see any reason why this would be a configurable option as long
as it works without issues, and there doesn't appear to be any user
space configuration tool to toggle this on/off, so this patch enables
cut-through forwarding on all eligible ports and traffic classes.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20211125125808.2383984-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-11-25 12:58:08 +00:00
|
|
|
u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot);
|
|
|
|
u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port);
|
|
|
|
void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining);
|
2021-02-12 15:15:59 +00:00
|
|
|
int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
|
|
|
|
struct switchdev_brport_flags val);
|
|
|
|
void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,
|
|
|
|
struct switchdev_brport_flags val);
|
net: dsa: felix: configure default-prio and dscp priorities
Follow the established programming model for this driver and provide
shims in the felix DSA driver which call the implementations from the
ocelot switch lib. The ocelot switchdev driver wasn't integrated with
dcbnl due to lack of hardware availability.
The switch doesn't have any fancy QoS classification enabled by default.
The provided getters will create a default-prio app table entry of 0,
and no dscp entry. However, the getters have been made to actually
retrieve the hardware configuration rather than static values, to be
future proof in case DSA will need this information from more call paths.
For default-prio, there is a single field per port, in ANA_PORT_QOS_CFG,
called QOS_DEFAULT_VAL.
DSCP classification is enabled per-port, again via ANA_PORT_QOS_CFG
(field QOS_DSCP_ENA), and individual DSCP values are configured as
trusted or not through register ANA_DSCP_CFG (replicated 64 times).
An untrusted DSCP value falls back to other QoS classification methods.
If trusted, the selected ANA_DSCP_CFG register also holds the QoS class
in the QOS_DSCP_VAL field.
The hardware also supports DSCP remapping (DSCP value X is translated to
DSCP value Y before the QoS class is determined based on the app table
entry for Y) and DSCP packet rewriting. The dcbnl framework, for being
so flexible in other useless areas, doesn't appear to support this.
So this functionality has been left out.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-11 21:15:20 +00:00
|
|
|
int ocelot_port_get_default_prio(struct ocelot *ocelot, int port);
|
|
|
|
int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio);
|
|
|
|
int ocelot_port_get_dscp_prio(struct ocelot *ocelot, int port, u8 dscp);
|
|
|
|
int ocelot_port_add_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio);
|
|
|
|
int ocelot_port_del_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio);
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bridge, int bridge_num,
|
|
|
|
struct netlink_ext_ack *extack);
|
2021-03-22 23:51:52 +00:00
|
|
|
void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bridge);
|
2022-01-07 14:42:29 +00:00
|
|
|
int ocelot_mact_flush(struct ocelot *ocelot, int port);
|
2019-11-14 15:03:27 +00:00
|
|
|
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
|
|
|
|
dsa_fdb_dump_cb_t *cb, void *data);
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr,
|
|
|
|
u16 vid, const struct net_device *bridge);
|
|
|
|
int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr,
|
|
|
|
u16 vid, const struct net_device *bridge);
|
2022-02-23 14:00:54 +00:00
|
|
|
int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond,
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
const struct net_device *bridge);
|
2022-02-23 14:00:54 +00:00
|
|
|
int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond,
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
const struct net_device *bridge);
|
2020-10-31 10:29:15 +00:00
|
|
|
int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
|
2021-08-19 17:40:06 +00:00
|
|
|
bool untagged, struct netlink_ext_ack *extack);
|
2019-11-14 15:03:27 +00:00
|
|
|
int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
|
|
|
|
bool untagged);
|
|
|
|
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
|
2019-11-20 08:23:14 +00:00
|
|
|
int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr);
|
|
|
|
int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr);
|
2021-04-27 04:22:02 +00:00
|
|
|
int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
struct sk_buff **clone);
|
2019-11-20 08:23:15 +00:00
|
|
|
void ocelot_get_txtstamp(struct ocelot *ocelot);
|
2020-03-27 19:55:47 +00:00
|
|
|
void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu);
|
|
|
|
int ocelot_get_max_mtu(struct ocelot *ocelot, int port);
|
2020-03-29 11:52:00 +00:00
|
|
|
int ocelot_port_policer_add(struct ocelot *ocelot, int port,
|
|
|
|
struct ocelot_policer *pol);
|
|
|
|
int ocelot_port_policer_del(struct ocelot *ocelot, int port);
|
net: mscc: ocelot: add port mirroring support using tc-matchall
Ocelot switches perform port-based ingress mirroring if
ANA:PORT:PORT_CFG field SRC_MIRROR_ENA is set, and egress mirroring if
the port is in ANA:ANA:EMIRRORPORTS.
Both ingress-mirrored and egress-mirrored frames are copied to the port
mask from ANA:ANA:MIRRORPORTS.
So the choice of limiting to a single mirror port via ocelot_mirror_get()
and ocelot_mirror_put() may seem bizarre, but the hardware model doesn't
map very well to the user space model. If the user wants to mirror the
ingress of swp1 towards swp2 and the ingress of swp3 towards swp4, we'd
have to program ANA:ANA:MIRRORPORTS with BIT(2) | BIT(4), and that would
make swp1 be mirrored towards swp4 too, and swp3 towards swp2. But there
are no tc-matchall rules to describe those actions.
Now, we could offload a matchall rule with multiple mirred actions, one
per desired mirror port, and force the user to stick to the multi-action
rule format for subsequent matchall filters. But both DSA and ocelot
have the flow_offload_has_one_action() check for the matchall offload,
plus the fact that it will get cumbersome to cross-check matchall
mirrors with flower mirrors (which will be added in the next patch).
As a result, we limit the configuration to a single mirror port, with
the possibility of lifting the restriction in the future.
Frames injected from the CPU don't get egress-mirrored, since they are
sent with the BYPASS bit in the injection frame header, and this
bypasses the analyzer module (effectively also the mirroring logic).
I don't know what to do/say about this.
Functionality was tested with:
tc qdisc add dev swp3 clsact
tc filter add dev swp3 ingress \
matchall skip_sw \
action mirred egress mirror dev swp1
and pinging through swp3, while seeing that the ICMP replies are
mirrored towards swp1.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-03-16 20:41:40 +00:00
|
|
|
int ocelot_port_mirror_add(struct ocelot *ocelot, int from, int to,
|
|
|
|
bool ingress, struct netlink_ext_ack *extack);
|
|
|
|
void ocelot_port_mirror_del(struct ocelot *ocelot, int from, bool ingress);
|
2020-02-29 14:31:14 +00:00
|
|
|
int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
|
|
|
|
struct flow_cls_offload *f, bool ingress);
|
|
|
|
int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port,
|
|
|
|
struct flow_cls_offload *f, bool ingress);
|
|
|
|
int ocelot_cls_flower_stats(struct ocelot *ocelot, int port,
|
|
|
|
struct flow_cls_offload *f, bool ingress);
|
2020-06-21 11:46:01 +00:00
|
|
|
int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
const struct switchdev_obj_port_mdb *mdb,
|
|
|
|
const struct net_device *bridge);
|
2020-06-21 11:46:01 +00:00
|
|
|
int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
|
net: mscc: ocelot: enforce FDB isolation when VLAN-unaware
Currently ocelot uses a pvid of 0 for standalone ports and ports under a
VLAN-unaware bridge, and the pvid of the bridge for ports under a
VLAN-aware bridge. Standalone ports do not perform learning, but packets
received on them are still subject to FDB lookups. So if the MAC DA that
a standalone port receives has been also learned on a VLAN-unaware
bridge port, ocelot will attempt to forward to that port, even though it
can't, so it will drop packets.
So there is a desire to avoid that, and isolate the FDBs of different
bridges from one another, and from standalone ports.
The ocelot switch library has two distinct entry points: the felix DSA
driver and the ocelot switchdev driver.
We need to code up a minimal bridge_num allocation in the ocelot
switchdev driver too, this is copied from DSA with the exception that
ocelot does not care about DSA trees, cross-chip bridging etc. So it
only looks at its own ports that are already in the same bridge.
The ocelot switchdev driver uses the bridge_num it has allocated itself,
while the felix driver uses the bridge_num allocated by DSA. They are
both stored inside ocelot_port->bridge_num by the common function
ocelot_port_bridge_join() which receives the bridge_num passed by value.
Once we have a bridge_num, we can only use it to enforce isolation
between VLAN-unaware bridges. As far as I can see, ocelot does not have
anything like a FID that further makes VLAN 100 from a port be different
to VLAN 100 from another port with regard to FDB lookup. So we simply
deny multiple VLAN-aware bridges.
For VLAN-unaware bridges, we crop the 4000-4095 VLAN region and we
allocate a VLAN for each bridge_num. This will be used as the pvid of
each port that is under that VLAN-unaware bridge, for as long as that
bridge is VLAN-unaware.
VID 0 remains only for standalone ports. It is okay if all standalone
ports use the same VID 0, since they perform no address learning, the
FDB will contain no entry in VLAN 0, so the packets will always be
flooded to the only possible destination, the CPU port.
The CPU port module doesn't need to be member of the VLANs to receive
packets, but if we use the DSA tag_8021q protocol, those packets are
part of the data plane as far as ocelot is concerned, so there it needs
to. Just ensure that the DSA tag_8021q CPU port is a member of all
reserved VLANs when it is created, and is removed when it is deleted.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:25 +00:00
|
|
|
const struct switchdev_obj_port_mdb *mdb,
|
|
|
|
const struct net_device *bridge);
|
2021-02-05 22:02:21 +00:00
|
|
|
int ocelot_port_lag_join(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bond,
|
|
|
|
struct netdev_lag_upper_info *info);
|
|
|
|
void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bond);
|
|
|
|
void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active);
|
2019-11-14 15:03:27 +00:00
|
|
|
|
net: mscc: ocelot: configure watermarks using devlink-sb
Using devlink-sb, we can configure 12/16 (the important 75%) of the
switch's controlling watermarks for congestion drops, and we can monitor
50% of the watermark occupancies (we can monitor the reservation
watermarks, but not the sharing watermarks, which are exposed as pool
sizes).
The following definitions can be made:
SB_BUF=0 # The devlink-sb for frame buffers
SB_REF=1 # The devlink-sb for frame references
POOL_ING=0 # The pool for ingress traffic. Both devlink-sb instances
# have one of these.
POOL_EGR=1 # The pool for egress traffic. Both devlink-sb instances
# have one of these.
Editing the hardware watermarks is done in the following way:
BUF_xxxx_I is accessed when sb=$SB_BUF and pool=$POOL_ING
REF_xxxx_I is accessed when sb=$SB_REF and pool=$POOL_ING
BUF_xxxx_E is accessed when sb=$SB_BUF and pool=$POOL_EGR
REF_xxxx_E is accessed when sb=$SB_REF and pool=$POOL_EGR
Configuring the sharing watermarks for COL_SHR(dp=0) is done implicitly
by modifying the corresponding pool size. By default, the pool size has
maximum size, so this can be skipped.
devlink sb pool set pci/0000:00:00.5 sb $SB_BUF pool $POOL_ING \
size 129840 thtype static
Since by default there is no buffer reservation, the above command has
maxed out BUF_COL_SHR_I(dp=0).
Configuring the per-port reservation watermark (P_RSRV) is done in the
following way:
devlink sb port pool set pci/0000:00:00.5/0 sb $SB_BUF \
pool $POOL_ING th 1000
The above command sets BUF_P_RSRV_I(port 0) to 1000 bytes. After this
command, the sharing watermarks are internally reconfigured with 1000
bytes less, i.e. from 129840 bytes to 128840 bytes.
Configuring the per-port-tc reservation watermarks (Q_RSRV) is done in
the following way:
for tc in {0..7}; do
devlink sb tc bind set pci/0000:00:00.5/0 sb 0 tc $tc \
type ingress pool $POOL_ING \
th 3000
done
The above command sets BUF_Q_RSRV_I(port 0, tc 0..7) to 3000 bytes.
The sharing watermarks are again reconfigured with 24000 bytes less.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-15 02:11:20 +00:00
|
|
|
int ocelot_devlink_sb_register(struct ocelot *ocelot);
|
|
|
|
void ocelot_devlink_sb_unregister(struct ocelot *ocelot);
|
|
|
|
int ocelot_sb_pool_get(struct ocelot *ocelot, unsigned int sb_index,
|
|
|
|
u16 pool_index,
|
|
|
|
struct devlink_sb_pool_info *pool_info);
|
|
|
|
int ocelot_sb_pool_set(struct ocelot *ocelot, unsigned int sb_index,
|
|
|
|
u16 pool_index, u32 size,
|
|
|
|
enum devlink_sb_threshold_type threshold_type,
|
|
|
|
struct netlink_ext_ack *extack);
|
|
|
|
int ocelot_sb_port_pool_get(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int sb_index, u16 pool_index,
|
|
|
|
u32 *p_threshold);
|
|
|
|
int ocelot_sb_port_pool_set(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int sb_index, u16 pool_index,
|
|
|
|
u32 threshold, struct netlink_ext_ack *extack);
|
|
|
|
int ocelot_sb_tc_pool_bind_get(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int sb_index, u16 tc_index,
|
|
|
|
enum devlink_sb_pool_type pool_type,
|
|
|
|
u16 *p_pool_index, u32 *p_threshold);
|
|
|
|
int ocelot_sb_tc_pool_bind_set(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int sb_index, u16 tc_index,
|
|
|
|
enum devlink_sb_pool_type pool_type,
|
|
|
|
u16 pool_index, u32 threshold,
|
|
|
|
struct netlink_ext_ack *extack);
|
|
|
|
int ocelot_sb_occ_snapshot(struct ocelot *ocelot, unsigned int sb_index);
|
|
|
|
int ocelot_sb_occ_max_clear(struct ocelot *ocelot, unsigned int sb_index);
|
|
|
|
int ocelot_sb_occ_port_pool_get(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int sb_index, u16 pool_index,
|
|
|
|
u32 *p_cur, u32 *p_max);
|
|
|
|
int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int sb_index, u16 tc_index,
|
|
|
|
enum devlink_sb_pool_type pool_type,
|
|
|
|
u32 *p_cur, u32 *p_max);
|
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 01:47:48 +00:00
|
|
|
void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int link_an_mode,
|
|
|
|
phy_interface_t interface,
|
|
|
|
unsigned long quirks);
|
|
|
|
void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
|
|
|
|
struct phy_device *phydev,
|
|
|
|
unsigned int link_an_mode,
|
|
|
|
phy_interface_t interface,
|
|
|
|
int speed, int duplex,
|
|
|
|
bool tx_pause, bool rx_pause,
|
|
|
|
unsigned long quirks);
|
|
|
|
|
2021-11-18 10:11:57 +00:00
|
|
|
int ocelot_mact_lookup(struct ocelot *ocelot, int *dst_idx,
|
|
|
|
const unsigned char mac[ETH_ALEN],
|
|
|
|
unsigned int vid, enum macaccess_entry_type *type);
|
|
|
|
int ocelot_mact_learn_streamdata(struct ocelot *ocelot, int dst_idx,
|
|
|
|
const unsigned char mac[ETH_ALEN],
|
|
|
|
unsigned int vid,
|
|
|
|
enum macaccess_entry_type type,
|
|
|
|
int sfid, int ssid);
|
|
|
|
|
2022-05-05 16:22:12 +00:00
|
|
|
int ocelot_migrate_mdbs(struct ocelot *ocelot, unsigned long from_mask,
|
|
|
|
unsigned long to_mask);
|
|
|
|
|
2021-11-18 10:12:02 +00:00
|
|
|
int ocelot_vcap_policer_add(struct ocelot *ocelot, u32 pol_ix,
|
|
|
|
struct ocelot_policer *pol);
|
|
|
|
int ocelot_vcap_policer_del(struct ocelot *ocelot, u32 pol_ix);
|
|
|
|
|
2021-02-16 21:42:03 +00:00
|
|
|
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
|
|
|
|
int ocelot_mrp_add(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_mrp *mrp);
|
|
|
|
int ocelot_mrp_del(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_mrp *mrp);
|
|
|
|
int ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_ring_role_mrp *mrp);
|
|
|
|
int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_ring_role_mrp *mrp);
|
|
|
|
#else
|
|
|
|
static inline int ocelot_mrp_add(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_mrp *mrp)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ocelot_mrp_del(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_mrp *mrp)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_ring_role_mrp *mrp)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_ring_role_mrp *mrp)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2019-11-14 15:03:27 +00:00
|
|
|
#endif
|