Skip to content

Commit

Permalink
Merge branch 'mlxsw-fw-mark-offload'
Browse files Browse the repository at this point in the history
Jiri Pirko says:

====================
mlxsw: Introduce support for offload forward mark

Ido says:
This patchset enables the forwarding of certain control packets by the
device instead of relying on the CPU to do the forwarding.

The first two patches simplify the current switchdev offload forward
infrastructure and make it usable for stacked devices. This is done by
moving the packet and port marking to the bridge driver instead of the
switch driver.

Patches 3-5 add the mlxsw specific bits to support the forward mark.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 26, 2016
2 parents 2a313cd + 1c6c6d2 commit ed35ca9
Show file tree
Hide file tree
Showing 16 changed files with 158 additions and 250 deletions.
13 changes: 4 additions & 9 deletions Documentation/networking/switchdev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver. The
bridge should not reflood the packet to the same ports the device flooded,
otherwise there will be duplicate packets on the wire.

To avoid duplicate packets, the device/driver should mark a packet as already
forwarded using skb->offload_fwd_mark. The same mark is set on the device
ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark
is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
will drop the skb right before transmit on the egress port, with the
understanding that the device already forwarded the packet on same egress port.
The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
a group ifindex.
To avoid duplicate packets, the switch driver should mark a packet as already
forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark
the skb using the ingress bridge port's mark and prevent it from being forwarded
through any bridge port with the same mark.

It is possible for the switch device to not handle flooding and push the
packets up to the bridge driver for flooding. This is not ideal as the number
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlxsw/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ struct mlxsw_rx_listener {
void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
u8 local_port;
u16 trap_id;
enum mlxsw_reg_hpkt_action action;
};

struct mlxsw_event_listener {
Expand Down
156 changes: 40 additions & 116 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum.c
Original file line number Diff line number Diff line change
Expand Up @@ -2570,123 +2570,47 @@ static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port,
netif_receive_skb(skb);
}

static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
void *priv)
{
skb->offload_fwd_mark = 1;
return mlxsw_sp_rx_listener_func(skb, local_port, priv);
}

#define MLXSW_SP_RXL(_func, _trap_id, _action) \
{ \
.func = _func, \
.local_port = MLXSW_PORT_DONT_CARE, \
.trap_id = MLXSW_TRAP_ID_##_trap_id, \
.action = MLXSW_REG_HPKT_ACTION_##_action, \
}

static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_FDB_MC,
},
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU),
/* Traps for specific L2 packet types, not trapped as FDB MC */
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_STP,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_LACP,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_EAPOL,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_LLDP,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_MMRP,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_MVRP,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_RPVST,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_DHCP,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IGMP_QUERY,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_ARPBC,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_ARPUC,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_MTUERROR,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_TTLERROR,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_LBERROR,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_OSPF,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IP2ME,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_RTR_INGRESS0,
},
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4,
},
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU),
/* L3 traps */
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU),
MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU),
};

static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
Expand All @@ -2713,7 +2637,7 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
if (err)
goto err_rx_listener_register;

mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action,
mlxsw_sp_rx_listener[i].trap_id);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
if (err)
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/rocker/rocker_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2412,7 +2412,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
skb->protocol = eth_type_trans(skb, rocker_port->dev);

if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
skb->offload_fwd_mark = 1;

rocker_port->dev->stats.rx_packets++;
rocker_port->dev->stats.rx_bytes += skb->len;
Expand Down
4 changes: 0 additions & 4 deletions drivers/net/ethernet/rocker/rocker_ofdpa.c
Original file line number Diff line number Diff line change
Expand Up @@ -2558,7 +2558,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port)
struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
int err;

switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false);
rocker_port_set_learning(rocker_port,
!!(ofdpa_port->brport_flags & BR_LEARNING));

Expand Down Expand Up @@ -2817,7 +2816,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex);

ofdpa_port->bridge_dev = bridge;
switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true);

return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0);
}
Expand All @@ -2836,8 +2834,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port)
ofdpa_port_internal_vlan_id_get(ofdpa_port,
ofdpa_port->dev->ifindex);

switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev,
false);
ofdpa_port->bridge_dev = NULL;

err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0);
Expand Down
5 changes: 0 additions & 5 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1562,8 +1562,6 @@ enum netdev_priv_flags {
*
* @xps_maps: XXX: need comments on this one
*
* @offload_fwd_mark: Offload device fwding mark
*
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
* @watchdog_timer: List of timers
Expand Down Expand Up @@ -1814,9 +1812,6 @@ struct net_device {
#ifdef CONFIG_NET_CLS_ACT
struct tcf_proto __rcu *egress_cl_list;
#endif
#ifdef CONFIG_NET_SWITCHDEV
u32 offload_fwd_mark;
#endif

/* These may be needed for future network-power-down code. */
struct timer_list watchdog_timer;
Expand Down
13 changes: 5 additions & 8 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
* @offload_fwd_mark: fwding offload mark
* @mark: Generic packet mark
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
Expand Down Expand Up @@ -730,7 +729,10 @@ struct sk_buff {
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
__u8 remcsum_offload:1;
/* 3 or 5 bit hole */
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
#endif
/* 2, 4 or 5 bit hole */

#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
Expand All @@ -757,14 +759,9 @@ struct sk_buff {
unsigned int sender_cpu;
};
#endif
union {
#ifdef CONFIG_NETWORK_SECMARK
__u32 secmark;
__u32 secmark;
#endif
#ifdef CONFIG_NET_SWITCHDEV
__u32 offload_fwd_mark;
#endif
};

union {
__u32 mark;
Expand Down
6 changes: 0 additions & 6 deletions include/net/switchdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,6 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
return idx;
}

static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
struct net_device *group_dev,
bool joining)
{
}

static inline bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
Expand Down
2 changes: 2 additions & 0 deletions net/bridge/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o

bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o

bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o

obj-$(CONFIG_NETFILTER) += netfilter/
3 changes: 2 additions & 1 deletion net/bridge/br_forward.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p,

vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
nbp_switchdev_allowed_egress(p, skb);
}

int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
Expand Down
10 changes: 7 additions & 3 deletions net/bridge/br_if.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err5;

err = nbp_switchdev_mark_set(p);
if (err)
goto err6;

dev_disable_lro(dev);

list_add_rcu(&p->list, &br->port_list);
Expand All @@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
err = nbp_vlan_init(p);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
goto err6;
goto err7;
}

spin_lock_bh(&br->lock);
Expand All @@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)

return 0;

err6:
err7:
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
err6:
netdev_upper_dev_unlink(dev, br->dev);

err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
Expand Down
2 changes: 2 additions & 0 deletions net/bridge/br_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
goto out;

nbp_switchdev_frame_mark(p, skb);

/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
Expand Down
Loading

0 comments on commit ed35ca9

Please sign in to comment.