diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst index 1da7a4b7383d6..728ab57a611a4 100644 --- a/Documentation/filesystems/debugfs.rst +++ b/Documentation/filesystems/debugfs.rst @@ -185,13 +185,17 @@ byte offsets over a base for the register block. If you want to dump an u32 array in debugfs, you can create file with:: + struct debugfs_u32_array { + u32 *array; + u32 n_elements; + }; + void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, - u32 *array, u32 elements); + struct debugfs_u32_array *array); -The "array" argument provides data, and the "elements" argument is -the number of elements in the array. Note: Once array is created its -size can not be changed. +The "array" argument wraps a pointer to the array's data and the number +of its elements. Note: Once array is created its size can not be changed. There is a helper function to create device related seq_file:: diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 459a0d11cfdeb..7d75f1e32152d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1230,6 +1230,39 @@ used to report the amplitude of the reflection for a given pair. | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV`` | s16 | Reflection amplitude | +-+-+-----------------------------------------+--------+----------------------+ +TUNNEL_INFO +=========== + +Gets information about the tunnel state NIC is aware of. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_TUNNEL_INFO_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_TUNNEL_INFO_HEADER`` | nested | reply header | + +---------------------------------------------+--------+---------------------+ + | ``ETHTOOL_A_TUNNEL_INFO_UDP_PORTS`` | nested | all UDP port tables | + +-+-------------------------------------------+--------+---------------------+ + | | ``ETHTOOL_A_TUNNEL_UDP_TABLE`` | nested | one UDP port table | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE`` | u32 | max size of the | + | | | | | table | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` | bitset | tunnel types which | + | | | | | table can hold | + +-+-+-----------------------------------------+--------+---------------------+ + | | | ``ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY`` | nested | offloaded UDP port | + +-+-+-+---------------------------------------+--------+---------------------+ + | | | | ``ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT`` | be16 | UDP port | + +-+-+-+---------------------------------------+--------+---------------------+ + | | | | ``ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE`` | u32 | tunnel type | + +-+-+-+---------------------------------------+--------+---------------------+ + Request translation =================== diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 43956232b0a44..0911eb3b80079 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4509,10 +4509,12 @@ static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type) switch (tunnel_type) { case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN: - req.tunnel_dst_port_id = bp->vxlan_fw_dst_port_id; + req.tunnel_dst_port_id = cpu_to_le16(bp->vxlan_fw_dst_port_id); + bp->vxlan_fw_dst_port_id = INVALID_HW_RING_ID; break; case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE: - req.tunnel_dst_port_id = bp->nge_fw_dst_port_id; + req.tunnel_dst_port_id = cpu_to_le16(bp->nge_fw_dst_port_id); + bp->nge_fw_dst_port_id = INVALID_HW_RING_ID; break; default: break; @@ -4547,10 +4549,11 @@ static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port, switch (tunnel_type) { case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN: - bp->vxlan_fw_dst_port_id = resp->tunnel_dst_port_id; + bp->vxlan_fw_dst_port_id = + le16_to_cpu(resp->tunnel_dst_port_id); break; case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE: - bp->nge_fw_dst_port_id = resp->tunnel_dst_port_id; + bp->nge_fw_dst_port_id = le16_to_cpu(resp->tunnel_dst_port_id); break; default: break; @@ -7578,16 +7581,12 @@ static int bnxt_hwrm_pcie_qstats(struct bnxt *bp) static void bnxt_hwrm_free_tunnel_ports(struct bnxt *bp) { - if (bp->vxlan_port_cnt) { + if (bp->vxlan_fw_dst_port_id != INVALID_HW_RING_ID) bnxt_hwrm_tunnel_dst_port_free( bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN); - } - bp->vxlan_port_cnt = 0; - if (bp->nge_port_cnt) { + if (bp->nge_fw_dst_port_id != INVALID_HW_RING_ID) bnxt_hwrm_tunnel_dst_port_free( bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE); - } - bp->nge_port_cnt = 0; } static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) @@ -9305,7 +9304,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } if (irq_re_init) - udp_tunnel_get_rx_info(bp->dev); + udp_tunnel_nic_reset_ntf(bp->dev); set_bit(BNXT_STATE_OPEN, &bp->state); bnxt_enable_int(bp); @@ -10456,24 +10455,6 @@ static void bnxt_sp_task(struct work_struct *work) bnxt_cfg_ntp_filters(bp); if (test_and_clear_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event)) bnxt_hwrm_exec_fwd_req(bp); - if (test_and_clear_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event)) { - bnxt_hwrm_tunnel_dst_port_alloc( - bp, bp->vxlan_port, - TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN); - } - if (test_and_clear_bit(BNXT_VXLAN_DEL_PORT_SP_EVENT, &bp->sp_event)) { - bnxt_hwrm_tunnel_dst_port_free( - bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN); - } - if (test_and_clear_bit(BNXT_GENEVE_ADD_PORT_SP_EVENT, &bp->sp_event)) { - bnxt_hwrm_tunnel_dst_port_alloc( - bp, bp->nge_port, - TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE); - } - if (test_and_clear_bit(BNXT_GENEVE_DEL_PORT_SP_EVENT, &bp->sp_event)) { - bnxt_hwrm_tunnel_dst_port_free( - bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE); - } if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) { bnxt_hwrm_port_qstats(bp); bnxt_hwrm_port_qstats_ext(bp); @@ -11070,6 +11051,9 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) timer_setup(&bp->timer, bnxt_timer, 0); bp->current_interval = BNXT_TIMER_INTERVAL; + bp->vxlan_fw_dst_port_id = INVALID_HW_RING_ID; + bp->nge_fw_dst_port_id = INVALID_HW_RING_ID; + clear_bit(BNXT_STATE_OPEN, &bp->state); return 0; @@ -11397,84 +11381,33 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) #endif /* CONFIG_RFS_ACCEL */ -static void bnxt_udp_tunnel_add(struct net_device *dev, - struct udp_tunnel_info *ti) +static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table) { - struct bnxt *bp = netdev_priv(dev); - - if (ti->sa_family != AF_INET6 && ti->sa_family != AF_INET) - return; - - if (!netif_running(dev)) - return; + struct bnxt *bp = netdev_priv(netdev); + struct udp_tunnel_info ti; + unsigned int cmd; - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (bp->vxlan_port_cnt && bp->vxlan_port != ti->port) - return; + udp_tunnel_nic_get_port(netdev, table, 0, &ti); + if (ti.type == UDP_TUNNEL_TYPE_VXLAN) + cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN; + else + cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE; - bp->vxlan_port_cnt++; - if (bp->vxlan_port_cnt == 1) { - bp->vxlan_port = ti->port; - set_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event); - bnxt_queue_sp_work(bp); - } - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (bp->nge_port_cnt && bp->nge_port != ti->port) - return; + if (ti.port) + return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd); - bp->nge_port_cnt++; - if (bp->nge_port_cnt == 1) { - bp->nge_port = ti->port; - set_bit(BNXT_GENEVE_ADD_PORT_SP_EVENT, &bp->sp_event); - } - break; - default: - return; - } - - bnxt_queue_sp_work(bp); + return bnxt_hwrm_tunnel_dst_port_free(bp, cmd); } -static void bnxt_udp_tunnel_del(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - struct bnxt *bp = netdev_priv(dev); - - if (ti->sa_family != AF_INET6 && ti->sa_family != AF_INET) - return; - - if (!netif_running(dev)) - return; - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (!bp->vxlan_port_cnt || bp->vxlan_port != ti->port) - return; - bp->vxlan_port_cnt--; - - if (bp->vxlan_port_cnt != 0) - return; - - set_bit(BNXT_VXLAN_DEL_PORT_SP_EVENT, &bp->sp_event); - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!bp->nge_port_cnt || bp->nge_port != ti->port) - return; - bp->nge_port_cnt--; - - if (bp->nge_port_cnt != 0) - return; - - set_bit(BNXT_GENEVE_DEL_PORT_SP_EVENT, &bp->sp_event); - break; - default: - return; - } - - bnxt_queue_sp_work(bp); -} +static const struct udp_tunnel_nic_info bnxt_udp_tunnels = { + .sync_table = bnxt_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | + UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, + }, +}; static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, @@ -11572,8 +11505,8 @@ static const struct net_device_ops bnxt_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = bnxt_rx_flow_steer, #endif - .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, - .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_bpf = bnxt_xdp, .ndo_xdp_xmit = bnxt_xdp_xmit, .ndo_bridge_getlink = bnxt_bridge_getlink, @@ -12063,6 +11996,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_PARTIAL; + dev->udp_tunnel_nic_info = &bnxt_udp_tunnels; + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM; dev->vlan_features = dev->hw_features | NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d556e5660a028..2acd7f9582460 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1765,12 +1765,8 @@ struct bnxt { ((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv)) #define BNXT_FW_MAJ(bp) ((bp)->fw_ver_code >> 48) - __be16 vxlan_port; - u8 vxlan_port_cnt; - __le16 vxlan_fw_dst_port_id; - __be16 nge_port; - u8 nge_port_cnt; - __le16 nge_fw_dst_port_id; + u16 vxlan_fw_dst_port_id; + u16 nge_fw_dst_port_id; u8 port_partition_type; u8 port_count; u16 br_mode; @@ -1790,16 +1786,12 @@ struct bnxt { #define BNXT_RX_NTP_FLTR_SP_EVENT 1 #define BNXT_LINK_CHNG_SP_EVENT 2 #define BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT 3 -#define BNXT_VXLAN_ADD_PORT_SP_EVENT 4 -#define BNXT_VXLAN_DEL_PORT_SP_EVENT 5 #define BNXT_RESET_TASK_SP_EVENT 6 #define BNXT_RST_RING_SP_EVENT 7 #define BNXT_HWRM_PF_UNLOAD_SP_EVENT 8 #define BNXT_PERIODIC_STATS_SP_EVENT 9 #define BNXT_HWRM_PORT_MODULE_SP_EVENT 10 #define BNXT_RESET_TASK_SILENT_SP_EVENT 11 -#define BNXT_GENEVE_ADD_PORT_SP_EVENT 12 -#define BNXT_GENEVE_DEL_PORT_SP_EVENT 13 #define BNXT_LINK_SPEED_CHNG_SP_EVENT 14 #define BNXT_FLOW_STATS_SP_EVENT 15 #define BNXT_UPDATE_PHY_SP_EVENT 16 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index debbcf2161345..1e8a809233a00 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -588,11 +588,9 @@ struct ixgbe_adapter { #define IXGBE_FLAG_FCOE_ENABLED BIT(21) #define IXGBE_FLAG_SRIOV_CAPABLE BIT(22) #define IXGBE_FLAG_SRIOV_ENABLED BIT(23) -#define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24) #define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) #define IXGBE_FLAG_DCB_CAPABLE BIT(27) -#define IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE BIT(28) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE BIT(0) @@ -606,7 +604,6 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP BIT(9) #define IXGBE_FLAG2_PTP_PPS_ENABLED BIT(10) #define IXGBE_FLAG2_PHY_INTERRUPT BIT(11) -#define IXGBE_FLAG2_UDP_TUN_REREG_NEEDED BIT(12) #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) #define IXGBE_FLAG2_EEE_CAPABLE BIT(14) #define IXGBE_FLAG2_EEE_ENABLED BIT(15) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f5d3d62307867..4d898ff21a466 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4994,24 +4994,41 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) napi_disable(&adapter->q_vector[q_idx]->napi); } -static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask) +static int ixgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table) { + struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u32 vxlanctrl; + struct udp_tunnel_info ti; - if (!(adapter->flags & (IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE | - IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))) - return; + udp_tunnel_nic_get_port(dev, table, 0, &ti); + if (ti.type == UDP_TUNNEL_TYPE_VXLAN) + adapter->vxlan_port = ti.port; + else + adapter->geneve_port = ti.port; - vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) & ~mask; - IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl); + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, + ntohs(adapter->vxlan_port) | + ntohs(adapter->geneve_port) << + IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT); + return 0; +} - if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK) - adapter->vxlan_port = 0; +static const struct udp_tunnel_nic_info ixgbe_udp_tunnels_x550 = { + .sync_table = ixgbe_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}; - if (mask & IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK) - adapter->geneve_port = 0; -} +static const struct udp_tunnel_nic_info ixgbe_udp_tunnels_x550em_a = { + .sync_table = ixgbe_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, + }, +}; #ifdef CONFIG_IXGBE_DCB /** @@ -6332,7 +6349,6 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; break; case ixgbe_mac_x550em_a: - adapter->flags |= IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE; switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: @@ -6359,7 +6375,6 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, #ifdef CONFIG_IXGBE_DCA adapter->flags &= ~IXGBE_FLAG_DCA_CAPABLE; #endif - adapter->flags |= IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE; break; default: break; @@ -6798,8 +6813,7 @@ int ixgbe_open(struct net_device *netdev) ixgbe_up_complete(adapter); - ixgbe_clear_udp_tunnel_port(adapter, IXGBE_VXLANCTRL_ALL_UDPPORT_MASK); - udp_tunnel_get_rx_info(netdev); + udp_tunnel_nic_reset_ntf(netdev); return 0; @@ -7921,12 +7935,6 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_service_event_complete(adapter); return; } - if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) { - rtnl_lock(); - adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; - udp_tunnel_get_rx_info(adapter->netdev); - rtnl_unlock(); - } ixgbe_reset_subtask(adapter); ixgbe_phy_interrupt_subtask(adapter); ixgbe_sfp_detection_subtask(adapter); @@ -9784,26 +9792,6 @@ static int ixgbe_set_features(struct net_device *netdev, netdev->features = features; - if ((adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) { - if (features & NETIF_F_RXCSUM) { - adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; - } else { - u32 port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK; - - ixgbe_clear_udp_tunnel_port(adapter, port_mask); - } - } - - if ((adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) { - if (features & NETIF_F_RXCSUM) { - adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; - } else { - u32 port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK; - - ixgbe_clear_udp_tunnel_port(adapter, port_mask); - } - } - if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1) ixgbe_reset_l2fw_offload(adapter); else if (need_reset) @@ -9815,118 +9803,6 @@ static int ixgbe_set_features(struct net_device *netdev, return 1; } -/** - * ixgbe_add_udp_tunnel_port - Get notifications about adding UDP tunnel ports - * @dev: The port's netdev - * @ti: Tunnel endpoint information - **/ -static void ixgbe_add_udp_tunnel_port(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - struct ixgbe_hw *hw = &adapter->hw; - __be16 port = ti->port; - u32 port_shift = 0; - u32 reg; - - if (ti->sa_family != AF_INET) - return; - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) - return; - - if (adapter->vxlan_port == port) - return; - - if (adapter->vxlan_port) { - netdev_info(dev, - "VXLAN port %d set, not adding port %d\n", - ntohs(adapter->vxlan_port), - ntohs(port)); - return; - } - - adapter->vxlan_port = port; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) - return; - - if (adapter->geneve_port == port) - return; - - if (adapter->geneve_port) { - netdev_info(dev, - "GENEVE port %d set, not adding port %d\n", - ntohs(adapter->geneve_port), - ntohs(port)); - return; - } - - port_shift = IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT; - adapter->geneve_port = port; - break; - default: - return; - } - - reg = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) | ntohs(port) << port_shift; - IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, reg); -} - -/** - * ixgbe_del_udp_tunnel_port - Get notifications about removing UDP tunnel ports - * @dev: The port's netdev - * @ti: Tunnel endpoint information - **/ -static void ixgbe_del_udp_tunnel_port(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - u32 port_mask; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN && - ti->type != UDP_TUNNEL_TYPE_GENEVE) - return; - - if (ti->sa_family != AF_INET) - return; - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) - return; - - if (adapter->vxlan_port != ti->port) { - netdev_info(dev, "VXLAN port %d not found\n", - ntohs(ti->port)); - return; - } - - port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK; - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) - return; - - if (adapter->geneve_port != ti->port) { - netdev_info(dev, "GENEVE port %d not found\n", - ntohs(ti->port)); - return; - } - - port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK; - break; - default: - return; - } - - ixgbe_clear_udp_tunnel_port(adapter, port_mask); - adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; -} - static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, @@ -10416,8 +10292,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, .ndo_dfwd_add_station = ixgbe_fwd_add, .ndo_dfwd_del_station = ixgbe_fwd_del, - .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, - .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_features_check = ixgbe_features_check, .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, @@ -10862,6 +10738,18 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550; + break; + case ixgbe_mac_x550em_a: + netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550em_a; + break; + default: + break; + } + /* Make sure the SWFW semaphore is in a valid state */ if (hw->mac.ops.init_swfw_sync) hw->mac.ops.init_swfw_sync(hw); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5bd3cd37d50f4..2b8608f8f0a9f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1816,7 +1816,7 @@ int mlx4_en_start_port(struct net_device *dev) queue_work(mdev->workqueue, &priv->rx_mode_task); if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) - udp_tunnel_get_rx_info(dev); + udp_tunnel_nic_reset_ntf(dev); priv->port_up = true; @@ -2628,89 +2628,32 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev, return 0; } -static void mlx4_en_add_vxlan_offloads(struct work_struct *work) +static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table) { + struct mlx4_en_priv *priv = netdev_priv(dev); + struct udp_tunnel_info ti; int ret; - struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - vxlan_add_task); - ret = mlx4_config_vxlan_port(priv->mdev->dev, priv->vxlan_port); - if (ret) - goto out; + udp_tunnel_nic_get_port(dev, table, 0, &ti); + priv->vxlan_port = ti.port; - ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, - VXLAN_STEER_BY_OUTER_MAC, 1); -out: - if (ret) { - en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); - return; - } -} - -static void mlx4_en_del_vxlan_offloads(struct work_struct *work) -{ - int ret; - struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - vxlan_del_task); - ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, - VXLAN_STEER_BY_OUTER_MAC, 0); + ret = mlx4_config_vxlan_port(priv->mdev->dev, priv->vxlan_port); if (ret) - en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); + return ret; - priv->vxlan_port = 0; + return mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, + VXLAN_STEER_BY_OUTER_MAC, + !!priv->vxlan_port); } -static void mlx4_en_add_vxlan_port(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - __be16 port = ti->port; - __be16 current_port; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; - - if (ti->sa_family != AF_INET) - return; - - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) - return; - - current_port = priv->vxlan_port; - if (current_port && current_port != port) { - en_warn(priv, "vxlan port %d configured, can't add port %d\n", - ntohs(current_port), ntohs(port)); - return; - } - - priv->vxlan_port = port; - queue_work(priv->mdev->workqueue, &priv->vxlan_add_task); -} - -static void mlx4_en_del_vxlan_port(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - __be16 port = ti->port; - __be16 current_port; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; - - if (ti->sa_family != AF_INET) - return; - - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) - return; - - current_port = priv->vxlan_port; - if (current_port != port) { - en_dbg(DRV, priv, "vxlan port %d isn't configured, ignoring\n", ntohs(port)); - return; - } - - queue_work(priv->mdev->workqueue, &priv->vxlan_del_task); -} +static const struct udp_tunnel_nic_info mlx4_udp_tunnels = { + .sync_table = mlx4_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | + UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}; static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, struct net_device *dev, @@ -2914,8 +2857,8 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, - .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, - .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, .ndo_bpf = mlx4_xdp, @@ -2948,8 +2891,8 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, - .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, - .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, .ndo_bpf = mlx4_xdp, @@ -3250,8 +3193,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); - INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); - INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); @@ -3406,6 +3347,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_PARTIAL; + + dev->udp_tunnel_nic_info = &mlx4_udp_tunnels; } dev->vlan_features = dev->hw_features; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9f56036129603..a46efe37cfa90 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -599,8 +599,6 @@ struct mlx4_en_priv { struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; - struct work_struct vxlan_add_task; - struct work_struct vxlan_del_task; struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_counter_stats pf_stats; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index e3d074008da26..49b00def2eef5 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1796,9 +1796,11 @@ static int geneve_netdevice_event(struct notifier_block *unused, event == NETDEV_UDP_TUNNEL_DROP_INFO) { geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO); } else if (event == NETDEV_UNREGISTER) { - geneve_offload_rx_ports(dev, false); + if (!dev->udp_tunnel_nic_info) + geneve_offload_rx_ports(dev, false); } else if (event == NETDEV_REGISTER) { - geneve_offload_rx_ports(dev, true); + if (!dev->udp_tunnel_nic_info) + geneve_offload_rx_ports(dev, true); } return NOTIFY_DONE; diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index f4d8f62f28c22..4dfb389dbfd84 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o dev.o fib.o bus.o health.o + netdev.o dev.o fib.o bus.o health.o udp_tunnels.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 0dc2c66a5d56e..ce719c830a77a 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -225,6 +225,7 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) debugfs_create_bool("fail_trap_policer_counter_get", 0600, nsim_dev->ddir, &nsim_dev->fail_trap_policer_counter_get); + nsim_udp_tunnels_debugfs_create(nsim_dev); return 0; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 2908e0a0d6e19..9d0d18026434c 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "netdevsim.h" @@ -257,6 +258,8 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, .ndo_bpf = nsim_bpf, + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, .ndo_get_devlink_port = nsim_get_devlink_port, }; @@ -299,10 +302,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); dev->netdev_ops = &nsim_netdev_ops; + err = nsim_udp_tunnels_info_create(nsim_dev, dev); + if (err) + goto err_free_netdev; + rtnl_lock(); err = nsim_bpf_init(ns); if (err) - goto err_free_netdev; + goto err_utn_destroy; nsim_ipsec_init(ns); @@ -317,6 +324,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); rtnl_unlock(); +err_utn_destroy: + nsim_udp_tunnels_info_destroy(dev); err_free_netdev: free_netdev(dev); return ERR_PTR(err); @@ -331,6 +340,7 @@ void nsim_destroy(struct netdevsim *ns) nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); rtnl_unlock(); + nsim_udp_tunnels_info_destroy(dev); free_netdev(dev); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 4ded54a21e1e8..d164052e0393d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -13,6 +13,7 @@ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ +#include #include #include #include @@ -29,6 +30,7 @@ #define NSIM_IPSEC_MAX_SA_COUNT 33 #define NSIM_IPSEC_VALID BIT(31) +#define NSIM_UDP_TUNNEL_N_PORTS 4 struct nsim_sa { struct xfrm_state *xs; @@ -72,12 +74,23 @@ struct netdevsim { bool bpf_map_accept; struct nsim_ipsec ipsec; + struct { + u32 inject_error; + u32 sleep; + u32 ports[2][NSIM_UDP_TUNNEL_N_PORTS]; + struct debugfs_u32_array dfs_ports[2]; + } udp_ports; }; struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port); void nsim_destroy(struct netdevsim *ns); +void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev); +int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, + struct net_device *dev); +void nsim_udp_tunnels_info_destroy(struct net_device *dev); + #ifdef CONFIG_BPF_SYSCALL int nsim_bpf_dev_init(struct nsim_dev *nsim_dev); void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev); @@ -183,6 +196,12 @@ struct nsim_dev { bool fail_trap_group_set; bool fail_trap_policer_set; bool fail_trap_policer_counter_get; + struct { + bool sync_all; + bool open_only; + bool ipv4_only; + u32 sleep; + } udp_ports; }; static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c new file mode 100644 index 0000000000000..22c06a76033c0 --- /dev/null +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020 Facebook Inc. + +#include +#include +#include +#include + +#include "netdevsim.h" + +static int +nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct netdevsim *ns = netdev_priv(dev); + int ret; + + ret = -ns->udp_ports.inject_error; + ns->udp_ports.inject_error = 0; + + if (ns->udp_ports.sleep) + msleep(ns->udp_ports.sleep); + + if (!ret) { + if (ns->udp_ports.ports[table][entry]) + ret = -EBUSY; + else + ns->udp_ports.ports[table][entry] = + be16_to_cpu(ti->port) << 16 | ti->type; + } + + netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", + table, entry, ti->type, ti->sa_family, ntohs(ti->port), + ret); + return ret; +} + +static int +nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct netdevsim *ns = netdev_priv(dev); + int ret; + + ret = -ns->udp_ports.inject_error; + ns->udp_ports.inject_error = 0; + + if (ns->udp_ports.sleep) + msleep(ns->udp_ports.sleep); + if (!ret) { + u32 val = be16_to_cpu(ti->port) << 16 | ti->type; + + if (val == ns->udp_ports.ports[table][entry]) + ns->udp_ports.ports[table][entry] = 0; + else + ret = -ENOENT; + } + + netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", + table, entry, ti->type, ti->sa_family, ntohs(ti->port), + ret); + return ret; +} + +static int +nsim_udp_tunnel_sync_table(struct net_device *dev, unsigned int table) +{ + struct netdevsim *ns = netdev_priv(dev); + struct udp_tunnel_info ti; + unsigned int i; + int ret; + + ret = -ns->udp_ports.inject_error; + ns->udp_ports.inject_error = 0; + + for (i = 0; i < NSIM_UDP_TUNNEL_N_PORTS; i++) { + udp_tunnel_nic_get_port(dev, table, i, &ti); + ns->udp_ports.ports[table][i] = + be16_to_cpu(ti.port) << 16 | ti.type; + } + + return ret; +} + +static const struct udp_tunnel_nic_info nsim_udp_tunnel_info = { + .set_port = nsim_udp_tunnel_set_port, + .unset_port = nsim_udp_tunnel_unset_port, + .sync_table = nsim_udp_tunnel_sync_table, + + .tables = { + { + .n_entries = NSIM_UDP_TUNNEL_N_PORTS, + .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, + }, + { + .n_entries = NSIM_UDP_TUNNEL_N_PORTS, + .tunnel_types = UDP_TUNNEL_TYPE_GENEVE | + UDP_TUNNEL_TYPE_VXLAN_GPE, + }, + }, +}; + +static ssize_t +nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct net_device *dev = file->private_data; + struct netdevsim *ns = netdev_priv(dev); + + memset(&ns->udp_ports.ports, 0, sizeof(ns->udp_ports.ports)); + rtnl_lock(); + udp_tunnel_nic_reset_ntf(dev); + rtnl_unlock(); + + return count; +} + +static const struct file_operations nsim_udp_tunnels_info_reset_fops = { + .open = simple_open, + .write = nsim_udp_tunnels_info_reset_write, + .llseek = generic_file_llseek, +}; + +int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, + struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + struct udp_tunnel_nic_info *info; + + debugfs_create_u32("udp_ports_inject_error", 0600, + ns->nsim_dev_port->ddir, + &ns->udp_ports.inject_error); + + ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; + ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; + debugfs_create_u32_array("udp_ports_table0", 0400, + ns->nsim_dev_port->ddir, + &ns->udp_ports.dfs_ports[0]); + + ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; + ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; + debugfs_create_u32_array("udp_ports_table1", 0400, + ns->nsim_dev_port->ddir, + &ns->udp_ports.dfs_ports[1]); + + debugfs_create_file("udp_ports_reset", 0200, ns->nsim_dev_port->ddir, + dev, &nsim_udp_tunnels_info_reset_fops); + + /* Note: it's not normal to allocate the info struct like this! + * Drivers are expected to use a static const one, here we're testing. + */ + info = kmemdup(&nsim_udp_tunnel_info, sizeof(nsim_udp_tunnel_info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + ns->udp_ports.sleep = nsim_dev->udp_ports.sleep; + + if (nsim_dev->udp_ports.sync_all) { + info->set_port = NULL; + info->unset_port = NULL; + } else { + info->sync_table = NULL; + } + + if (ns->udp_ports.sleep) + info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP; + if (nsim_dev->udp_ports.open_only) + info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; + if (nsim_dev->udp_ports.ipv4_only) + info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; + + dev->udp_tunnel_nic_info = info; + return 0; +} + +void nsim_udp_tunnels_info_destroy(struct net_device *dev) +{ + kfree(dev->udp_tunnel_nic_info); + dev->udp_tunnel_nic_info = NULL; +} + +void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) +{ + debugfs_create_bool("udp_ports_sync_all", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.sync_all); + debugfs_create_bool("udp_ports_open_only", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.open_only); + debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.ipv4_only); + debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, + &nsim_dev->udp_ports.sleep); +} diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 89d85dcb200e8..a43c97b139245 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4477,10 +4477,12 @@ static int vxlan_netdevice_event(struct notifier_block *unused, struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); if (event == NETDEV_UNREGISTER) { - vxlan_offload_rx_ports(dev, false); + if (!dev->udp_tunnel_nic_info) + vxlan_offload_rx_ports(dev, false); vxlan_handle_lowerdev_unregister(vn, dev); } else if (event == NETDEV_REGISTER) { - vxlan_offload_rx_ports(dev, true); + if (!dev->udp_tunnel_nic_info) + vxlan_offload_rx_ports(dev, true); } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO || event == NETDEV_UDP_TUNNEL_DROP_INFO) { vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index ae49a55bda001..d0ed71f37511b 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -918,11 +918,6 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_blob); -struct array_data { - void *array; - u32 elements; -}; - static size_t u32_format_array(char *buf, size_t bufsize, u32 *array, int array_size) { @@ -943,8 +938,8 @@ static size_t u32_format_array(char *buf, size_t bufsize, static int u32_array_open(struct inode *inode, struct file *file) { - struct array_data *data = inode->i_private; - int size, elements = data->elements; + struct debugfs_u32_array *data = inode->i_private; + int size, elements = data->n_elements; char *buf; /* @@ -959,7 +954,7 @@ static int u32_array_open(struct inode *inode, struct file *file) buf[size] = 0; file->private_data = buf; - u32_format_array(buf, size, data->array, data->elements); + u32_format_array(buf, size, data->array, data->n_elements); return nonseekable_open(inode, file); } @@ -996,8 +991,7 @@ static const struct file_operations u32_array_fops = { * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. - * @array: u32 array that provides data. - * @elements: total number of elements in the array. + * @array: wrapper struct containing data pointer and size of the array. * * This function creates a file in debugfs with the given name that exports * @array as data. If the @mode variable is so set it can be read from. @@ -1005,17 +999,10 @@ static const struct file_operations u32_array_fops = { * Once array is created its size can not be changed. */ void debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, u32 *array, u32 elements) + struct dentry *parent, + struct debugfs_u32_array *array) { - struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); - - if (data == NULL) - return; - - data->array = array; - data->elements = elements; - - debugfs_create_file_unsafe(name, mode, parent, data, &u32_array_fops); + debugfs_create_file_unsafe(name, mode, parent, array, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 63cb3606dea7f..851dd1f9a8a5f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -38,6 +38,11 @@ struct debugfs_regset32 { struct device *dev; /* Optional device for Runtime PM */ }; +struct debugfs_u32_array { + u32 *array; + u32 n_elements; +}; + extern struct dentry *arch_debugfs_dir; #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ @@ -136,7 +141,8 @@ void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); void debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, u32 *array, u32 elements); + struct dentry *parent, + struct debugfs_u32_array *array); struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, struct dentry *parent, @@ -316,8 +322,8 @@ static inline bool debugfs_initialized(void) } static inline void debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, u32 *array, - u32 elements) + struct dentry *parent, + struct debugfs_u32_array *array) { } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 39e28e11863c7..ac2cd3f49abaa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -65,6 +65,8 @@ struct wpan_dev; struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; +struct udp_tunnel_nic_info; +struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; @@ -1836,6 +1838,10 @@ enum netdev_priv_flags { * * @macsec_ops: MACsec offloading ops * + * @udp_tunnel_nic_info: static structure describing the UDP tunnel + * offload capabilities of the device + * @udp_tunnel_nic: UDP tunnel offload state + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2134,6 +2140,8 @@ struct net_device { /* MACsec management functions */ const struct macsec_ops *macsec_ops; #endif + const struct udp_tunnel_nic_info *udp_tunnel_nic_info; + struct udp_tunnel_nic *udp_tunnel_nic; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index e7312ceb2794a..dd20ce99740c8 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -106,15 +106,16 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, * call this function to perform Tx offloads on outgoing traffic. */ enum udp_parsable_tunnel_type { - UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */ - UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */ - UDP_TUNNEL_TYPE_VXLAN_GPE, /* draft-ietf-nvo3-vxlan-gpe */ + UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */ + UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */ + UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { unsigned short type; sa_family_t sa_family; __be16 port; + u8 hw_priv; }; /* Notify network devices of offloadable types */ @@ -181,4 +182,161 @@ static inline void udp_tunnel_encap_enable(struct socket *sock) udp_encap_enable(); } +#define UDP_TUNNEL_NIC_MAX_TABLES 4 + +enum udp_tunnel_nic_info_flags { + /* Device callbacks may sleep */ + UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), + /* Device only supports offloads when it's open, all ports + * will be removed before close and re-added after open. + */ + UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), + /* Device supports only IPv4 tunnels */ + UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), +}; + +/** + * struct udp_tunnel_nic_info - driver UDP tunnel offload information + * @set_port: callback for adding a new port + * @unset_port: callback for removing a port + * @sync_table: callback for syncing the entire port table at once + * @flags: device flags from enum udp_tunnel_nic_info_flags + * @tables: UDP port tables this device has + * @tables.n_entries: number of entries in this table + * @tables.tunnel_types: types of tunnels this table accepts + * + * Drivers are expected to provide either @set_port and @unset_port callbacks + * or the @sync_table callback. Callbacks are invoked with rtnl lock held. + * + * Known limitations: + * - UDP tunnel port notifications are fundamentally best-effort - + * it is likely the driver will both see skbs which use a UDP tunnel port, + * while not being a tunneled skb, and tunnel skbs from other ports - + * drivers should only use these ports for non-critical RX-side offloads, + * e.g. the checksum offload; + * - none of the devices care about the socket family at present, so we don't + * track it. Please extend this code if you care. + */ +struct udp_tunnel_nic_info { + /* one-by-one */ + int (*set_port)(struct net_device *dev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti); + int (*unset_port)(struct net_device *dev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti); + + /* all at once */ + int (*sync_table)(struct net_device *dev, unsigned int table); + + unsigned int flags; + + struct udp_tunnel_nic_table_info { + unsigned int n_entries; + unsigned int tunnel_types; + } tables[UDP_TUNNEL_NIC_MAX_TABLES]; +}; + +/* UDP tunnel module dependencies + * + * Tunnel drivers are expected to have a hard dependency on the udp_tunnel + * module. NIC drivers are not, they just attach their + * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. + * Loading a tunnel driver will cause the udp_tunnel module to be loaded + * and only then will all the required state structures be allocated. + * Since we want a weak dependency from the drivers and the core to udp_tunnel + * we call things through the following stubs. + */ +struct udp_tunnel_nic_ops { + void (*get_port)(struct net_device *dev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti); + void (*set_port_priv)(struct net_device *dev, unsigned int table, + unsigned int idx, u8 priv); + void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); + void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); + void (*reset_ntf)(struct net_device *dev); + + size_t (*dump_size)(struct net_device *dev, unsigned int table); + int (*dump_write)(struct net_device *dev, unsigned int table, + struct sk_buff *skb); +}; + +#ifdef CONFIG_INET +extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; +#else +#define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) +#endif + +static inline void +udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti) +{ + /* This helper is used from .sync_table, we indicate empty entries + * by zero'ed @ti. Drivers which need to know the details of a port + * when it gets deleted should use the .set_port / .unset_port + * callbacks. + * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. + */ + memset(ti, 0, sizeof(*ti)); + + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->get_port(dev, table, idx, ti); +} + +static inline void +udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, + unsigned int idx, u8 priv) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); +} + +static inline void +udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->add_port(dev, ti); +} + +static inline void +udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->del_port(dev, ti); +} + +/** + * udp_tunnel_nic_reset_ntf() - device-originating reset notification + * @dev: network interface device structure + * + * Called by the driver to inform the core that the entire UDP tunnel port + * state has been lost, usually due to device reset. Core will assume device + * forgot all the ports and issue .set_port and .sync_table callbacks as + * necessary. + * + * This function must be called with rtnl lock held, and will issue all + * the callbacks before returning. + */ +static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->reset_ntf(dev); +} + +static inline size_t +udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) +{ + if (!udp_tunnel_nic_ops) + return 0; + return udp_tunnel_nic_ops->dump_size(dev, table); +} + +static inline int +udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, + struct sk_buff *skb) +{ + if (!udp_tunnel_nic_ops) + return 0; + return udp_tunnel_nic_ops->dump_write(dev, table, skb); +} #endif diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 60856e0f9618b..b4f2d134e7131 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -669,6 +669,7 @@ enum ethtool_link_ext_substate_cable_issue { * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags * @ETH_SS_TS_TX_TYPES: timestamping Tx types * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters + * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -686,6 +687,7 @@ enum ethtool_stringset { ETH_SS_SOF_TIMESTAMPING, ETH_SS_TS_TX_TYPES, ETH_SS_TS_RX_FILTERS, + ETH_SS_UDP_TUNNEL_TYPES, /* add new constants above here */ ETH_SS_COUNT diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c12ce4df4b6b7..5dcd24cb33ea2 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -41,6 +41,7 @@ enum { ETHTOOL_MSG_TSINFO_GET, ETHTOOL_MSG_CABLE_TEST_ACT, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -556,6 +557,60 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 }; +/* TUNNEL INFO */ + +enum { + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, + ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, + + __ETHTOOL_UDP_TUNNEL_TYPE_CNT +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, + ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_CNT, + ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_INFO_UNSPEC, + ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_INFO_CNT, + ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/mm/cma.h b/mm/cma.h index 33c0b517733c7..6698fa63279b1 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -2,6 +2,8 @@ #ifndef __MM_CMA_H__ #define __MM_CMA_H__ +#include + struct cma { unsigned long base_pfn; unsigned long count; @@ -11,6 +13,7 @@ struct cma { #ifdef CONFIG_CMA_DEBUGFS struct hlist_head mem_head; spinlock_t mem_head_lock; + struct debugfs_u32_array dfs_bitmap; #endif const char *name; }; diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 4e6cbe2f586e8..d5bf8aa34fdca 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -164,7 +164,6 @@ static void cma_debugfs_add_one(struct cma *cma, struct dentry *root_dentry) { struct dentry *tmp; char name[16]; - int u32s; scnprintf(name, sizeof(name), "cma-%s", cma->name); @@ -180,8 +179,10 @@ static void cma_debugfs_add_one(struct cma *cma, struct dentry *root_dentry) debugfs_create_file("used", 0444, tmp, cma, &cma_used_fops); debugfs_create_file("maxchunk", 0444, tmp, cma, &cma_maxchunk_fops); - u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); - debugfs_create_u32_array("bitmap", 0444, tmp, (u32 *)cma->bitmap, u32s); + cma->dfs_bitmap.array = (u32 *)cma->bitmap; + cma->dfs_bitmap.n_elements = DIV_ROUND_UP(cma_bitmap_maxno(cma), + BITS_PER_BYTE * sizeof(u32)); + debugfs_create_u32_array("bitmap", 0444, tmp, &cma->dfs_bitmap); } static int __init cma_debugfs_init(void) diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 0c2b94f204991..7a849ff22dada 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ - channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o + channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ + tunnels.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index c541667137970..ed19573fccd7c 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -272,6 +273,14 @@ const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); +const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { + [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", + [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", + [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE] = "vxlan-gpe", +}; +static_assert(ARRAY_SIZE(udp_tunnel_type_names) == + __ETHTOOL_UDP_TUNNEL_TYPE_CNT); + /* return false if legacy contained non-0 deprecated fields * maxtxpkt/maxrxpkt. rest of ksettings always updated */ diff --git a/net/ethtool/common.h b/net/ethtool/common.h index b83bef38368c6..3d9251c95a8bc 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -28,6 +28,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; +extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 88fd07f47040c..fb9d096faaa47 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -181,6 +181,12 @@ struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, return NULL; } +void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd) +{ + return genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, 0, cmd); +} + void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) { return genlmsg_put(skb, 0, ++ethnl_bcast_seq, ðtool_genl_family, 0, @@ -849,6 +855,12 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_act_cable_test_tdr, }, + { + .cmd = ETHTOOL_MSG_TUNNEL_INFO_GET, + .doit = ethnl_tunnel_info_doit, + .start = ethnl_tunnel_info_start, + .dumpit = ethnl_tunnel_info_dumpit, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a96b6e90dc22..e2085005caacd 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -19,6 +19,7 @@ int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, u16 hdr_attrtype, struct genl_info *info, void **ehdrp); +void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd); void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); @@ -361,5 +362,8 @@ int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info); int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); +int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); +int ethnl_tunnel_info_start(struct netlink_callback *cb); +int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 773634b6b0483..82707b662fe47 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -75,6 +75,11 @@ static const struct strset_info info_template[] = { .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, + [ETH_SS_UDP_TUNNEL_TYPES] = { + .per_dev = false, + .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + .strings = udp_tunnel_type_names, + }, }; struct strset_req_info { diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c new file mode 100644 index 0000000000000..6b89255f12319 --- /dev/null +++ b/net/ethtool/tunnels.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "bitset.h" +#include "common.h" +#include "netlink.h" + +static const struct nla_policy +ethtool_tunnel_info_policy[ETHTOOL_A_TUNNEL_INFO_MAX + 1] = { + [ETHTOOL_A_TUNNEL_INFO_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_TUNNEL_INFO_HEADER] = { .type = NLA_NESTED }, +}; + +static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN == ilog2(UDP_TUNNEL_TYPE_VXLAN)); +static_assert(ETHTOOL_UDP_TUNNEL_TYPE_GENEVE == ilog2(UDP_TUNNEL_TYPE_GENEVE)); +static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE == + ilog2(UDP_TUNNEL_TYPE_VXLAN_GPE)); + +static ssize_t +ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, + struct netlink_ext_ack *extack) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct udp_tunnel_nic_info *info; + unsigned int i; + size_t size; + int ret; + + info = req_base->dev->udp_tunnel_nic_info; + if (!info) { + NL_SET_ERR_MSG(extack, + "device does not report tunnel offload info"); + return -EOPNOTSUPP; + } + + size = nla_total_size(0); /* _INFO_UDP_PORTS */ + + for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { + if (!info->tables[i].n_entries) + return size; + + size += nla_total_size(0); /* _UDP_TABLE */ + size += nla_total_size(sizeof(u32)); /* _UDP_TABLE_SIZE */ + ret = ethnl_bitset32_size(&info->tables[i].tunnel_types, NULL, + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + udp_tunnel_type_names, compact); + if (ret < 0) + return ret; + size += ret; + + size += udp_tunnel_nic_dump_size(req_base->dev, i); + } + + return size; +} + +static int +ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base, + struct sk_buff *skb) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct udp_tunnel_nic_info *info; + struct nlattr *ports, *table; + unsigned int i; + + info = req_base->dev->udp_tunnel_nic_info; + if (!info) + return -EOPNOTSUPP; + + ports = nla_nest_start(skb, ETHTOOL_A_TUNNEL_INFO_UDP_PORTS); + if (!ports) + return -EMSGSIZE; + + for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { + if (!info->tables[i].n_entries) + break; + + table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE); + if (!table) + goto err_cancel_ports; + + if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, + info->tables[i].n_entries)) + goto err_cancel_table; + + if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, + &info->tables[i].tunnel_types, NULL, + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + udp_tunnel_type_names, compact)) + goto err_cancel_table; + + if (udp_tunnel_nic_dump_write(req_base->dev, i, skb)) + goto err_cancel_table; + + nla_nest_end(skb, table); + } + + nla_nest_end(skb, ports); + + return 0; + +err_cancel_table: + nla_nest_cancel(skb, table); +err_cancel_ports: + nla_nest_cancel(skb, ports); + return -EMSGSIZE; +} + +static int +ethnl_tunnel_info_req_parse(struct ethnl_req_info *req_info, + const struct nlmsghdr *nlhdr, struct net *net, + struct netlink_ext_ack *extack, bool require_dev) +{ + struct nlattr *tb[ETHTOOL_A_TUNNEL_INFO_MAX + 1]; + int ret; + + ret = nlmsg_parse(nlhdr, GENL_HDRLEN, tb, ETHTOOL_A_TUNNEL_INFO_MAX, + ethtool_tunnel_info_policy, extack); + if (ret < 0) + return ret; + + return ethnl_parse_header_dev_get(req_info, + tb[ETHTOOL_A_TUNNEL_INFO_HEADER], + net, extack, require_dev); +} + +int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_req_info req_info = {}; + struct sk_buff *rskb; + void *reply_payload; + int reply_len; + int ret; + + ret = ethnl_tunnel_info_req_parse(&req_info, info->nlhdr, + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + + rtnl_lock(); + ret = ethnl_tunnel_info_reply_size(&req_info, info->extack); + if (ret < 0) + goto err_unlock_rtnl; + reply_len = ret + ethnl_reply_header_size(); + + rskb = ethnl_reply_init(reply_len, req_info.dev, + ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_A_TUNNEL_INFO_HEADER, + info, &reply_payload); + if (!rskb) { + ret = -ENOMEM; + goto err_unlock_rtnl; + } + + ret = ethnl_tunnel_info_fill_reply(&req_info, rskb); + if (ret) + goto err_free_msg; + rtnl_unlock(); + dev_put(req_info.dev); + genlmsg_end(rskb, reply_payload); + + return genlmsg_reply(rskb, info); + +err_free_msg: + nlmsg_free(rskb); +err_unlock_rtnl: + rtnl_unlock(); + dev_put(req_info.dev); + return ret; +} + +struct ethnl_tunnel_info_dump_ctx { + struct ethnl_req_info req_info; + int pos_hash; + int pos_idx; +}; + +int ethnl_tunnel_info_start(struct netlink_callback *cb) +{ + struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + memset(ctx, 0, sizeof(*ctx)); + + ret = ethnl_tunnel_info_req_parse(&ctx->req_info, cb->nlh, + sock_net(cb->skb->sk), cb->extack, + false); + if (ctx->req_info.dev) { + dev_put(ctx->req_info.dev); + ctx->req_info.dev = NULL; + } + + return ret; +} + +int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + int s_idx = ctx->pos_idx; + int h, idx = 0; + int ret = 0; + void *ehdr; + + rtnl_lock(); + cb->seq = net->dev_base_seq; + for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + struct hlist_head *head; + struct net_device *dev; + + head = &net->dev_index_head[h]; + idx = 0; + hlist_for_each_entry(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_TUNNEL_INFO_GET); + if (!ehdr) { + ret = -EMSGSIZE; + goto out; + } + + ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER); + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + goto out; + } + + ctx->req_info.dev = dev; + ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb); + ctx->req_info.dev = NULL; + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + if (ret == -EOPNOTSUPP) + goto cont; + goto out; + } + genlmsg_end(skb, ehdr); +cont: + idx++; + } + } +out: + rtnl_unlock(); + + ctx->pos_hash = h; + ctx->pos_idx = idx; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + + if (ret == -EMSGSIZE && skb->len) + return skb->len; + return ret; +} diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9e1a186a3671e..5b77a46885b95 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -14,7 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ - metrics.o netlink.o nexthop.o + metrics.o netlink.o nexthop.o udp_tunnel_stub.o obj-$(CONFIG_BPFILTER) += bpfilter/ @@ -29,6 +29,7 @@ gre-y := gre_demux.o obj-$(CONFIG_NET_FOU) += fou.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o +udp_tunnel-y := udp_tunnel_core.o udp_tunnel_nic.o obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel_core.c similarity index 100% rename from net/ipv4/udp_tunnel.c rename to net/ipv4/udp_tunnel_core.c diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c new file mode 100644 index 0000000000000..f0dbd9905a531 --- /dev/null +++ b/net/ipv4/udp_tunnel_nic.c @@ -0,0 +1,890 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020 Facebook Inc. + +#include +#include +#include +#include +#include +#include + +enum udp_tunnel_nic_table_entry_flags { + UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0), + UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1), + UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2), + UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3), +}; + +struct udp_tunnel_nic_table_entry { + __be16 port; + u8 type; + u8 use_cnt; + u8 flags; + u8 hw_priv; +}; + +/** + * struct udp_tunnel_nic - UDP tunnel port offload state + * @work: async work for talking to hardware from process context + * @dev: netdev pointer + * @need_sync: at least one port start changed + * @need_replay: space was freed, we need a replay of all ports + * @work_pending: @work is currently scheduled + * @n_tables: number of tables under @entries + * @missed: bitmap of tables which overflown + * @entries: table of tables of ports currently offloaded + */ +struct udp_tunnel_nic { + struct work_struct work; + + struct net_device *dev; + + u8 need_sync:1; + u8 need_replay:1; + u8 work_pending:1; + + unsigned int n_tables; + unsigned long missed; + struct udp_tunnel_nic_table_entry **entries; +}; + +/* We ensure all work structs are done using driver state, but not the code. + * We need a workqueue we can flush before module gets removed. + */ +static struct workqueue_struct *udp_tunnel_nic_workqueue; + +static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type) +{ + switch (type) { + case UDP_TUNNEL_TYPE_VXLAN: + return "vxlan"; + case UDP_TUNNEL_TYPE_GENEVE: + return "geneve"; + case UDP_TUNNEL_TYPE_VXLAN_GPE: + return "vxlan-gpe"; + default: + return "unknown"; + } +} + +static bool +udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->use_cnt == 0 && !entry->flags; +} + +static bool +udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN); +} + +static bool +udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN; +} + +static void +udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry) +{ + if (!udp_tunnel_nic_entry_is_free(entry)) + entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN; +} + +static void +udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry) +{ + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN; +} + +static bool +udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD | + UDP_TUNNEL_NIC_ENTRY_DEL); +} + +static void +udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn, + struct udp_tunnel_nic_table_entry *entry, + unsigned int flag) +{ + entry->flags |= flag; + utn->need_sync = 1; +} + +static void +udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry, + struct udp_tunnel_info *ti) +{ + memset(ti, 0, sizeof(*ti)); + ti->port = entry->port; + ti->type = entry->type; + ti->hw_priv = entry->hw_priv; +} + +static bool +udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) + return false; + return true; +} + +static bool +udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_table_info *table; + unsigned int i, j; + + if (!utn->missed) + return false; + + for (i = 0; i < utn->n_tables; i++) { + table = &dev->udp_tunnel_nic_info->tables[i]; + if (!test_bit(i, &utn->missed)) + continue; + + for (j = 0; j < table->n_entries; j++) + if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) + return true; + } + + return false; +} + +static void +__udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti) +{ + struct udp_tunnel_nic_table_entry *entry; + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + entry = &utn->entries[table][idx]; + + if (entry->use_cnt) + udp_tunnel_nic_ti_from_entry(entry, ti); +} + +static void +__udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, + unsigned int idx, u8 priv) +{ + dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv; +} + +static void +udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry, + int err) +{ + bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; + + WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && + entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL); + + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && + (!err || (err == -EEXIST && dodgy))) + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD; + + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL && + (!err || (err == -ENOENT && dodgy))) + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL; + + if (!err) + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL; + else + entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL; +} + +static void +udp_tunnel_nic_device_sync_one(struct net_device *dev, + struct udp_tunnel_nic *utn, + unsigned int table, unsigned int idx) +{ + struct udp_tunnel_nic_table_entry *entry; + struct udp_tunnel_info ti; + int err; + + entry = &utn->entries[table][idx]; + if (!udp_tunnel_nic_entry_is_queued(entry)) + return; + + udp_tunnel_nic_ti_from_entry(entry, &ti); + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD) + err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti); + else + err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx, + &ti); + udp_tunnel_nic_entry_update_done(entry, err); + + if (err) + netdev_warn(dev, + "UDP tunnel port sync failed port %d type %s: %d\n", + be16_to_cpu(entry->port), + udp_tunnel_nic_tunnel_type_name(entry->type), + err); +} + +static void +udp_tunnel_nic_device_sync_by_port(struct net_device *dev, + struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + udp_tunnel_nic_device_sync_one(dev, utn, i, j); +} + +static void +udp_tunnel_nic_device_sync_by_table(struct net_device *dev, + struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + int err; + + for (i = 0; i < utn->n_tables; i++) { + /* Find something that needs sync in this table */ + for (j = 0; j < info->tables[i].n_entries; j++) + if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j])) + break; + if (j == info->tables[i].n_entries) + continue; + + err = info->sync_table(dev, i); + if (err) + netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n", + i, err); + + for (j = 0; j < info->tables[i].n_entries; j++) { + struct udp_tunnel_nic_table_entry *entry; + + entry = &utn->entries[i][j]; + if (udp_tunnel_nic_entry_is_queued(entry)) + udp_tunnel_nic_entry_update_done(entry, err); + } + } +} + +static void +__udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + if (!utn->need_sync) + return; + + if (dev->udp_tunnel_nic_info->sync_table) + udp_tunnel_nic_device_sync_by_table(dev, utn); + else + udp_tunnel_nic_device_sync_by_port(dev, utn); + + utn->need_sync = 0; + /* Can't replay directly here, in case we come from the tunnel driver's + * notification - trying to replay may deadlock inside tunnel driver. + */ + utn->need_replay = udp_tunnel_nic_should_replay(dev, utn); +} + +static void +udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + bool may_sleep; + + if (!utn->need_sync) + return; + + /* Drivers which sleep in the callback need to update from + * the workqueue, if we come from the tunnel driver's notification. + */ + may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; + if (!may_sleep) + __udp_tunnel_nic_device_sync(dev, utn); + if (may_sleep || utn->need_replay) { + queue_work(udp_tunnel_nic_workqueue, &utn->work); + utn->work_pending = 1; + } +} + +static bool +udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table, + struct udp_tunnel_info *ti) +{ + return table->tunnel_types & ti->type; +} + +static bool +udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i; + + /* Special case IPv4-only NICs */ + if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY && + ti->sa_family != AF_INET) + return false; + + for (i = 0; i < utn->n_tables; i++) + if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti)) + return true; + return false; +} + +static int +udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic_table_entry *entry; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) { + entry = &utn->entries[i][j]; + + if (!udp_tunnel_nic_entry_is_free(entry) && + entry->port == ti->port && + entry->type != ti->type) { + __set_bit(i, &utn->missed); + return true; + } + } + return false; +} + +static void +udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, + unsigned int table, unsigned int idx, int use_cnt_adj) +{ + struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; + bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; + unsigned int from, to; + + /* If not going from used to unused or vice versa - all done. + * For dodgy entries make sure we try to sync again (queue the entry). + */ + entry->use_cnt += use_cnt_adj; + if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj)) + return; + + /* Cancel the op before it was sent to the device, if possible, + * otherwise we'd need to take special care to issue commands + * in the same order the ports arrived. + */ + if (use_cnt_adj < 0) { + from = UDP_TUNNEL_NIC_ENTRY_ADD; + to = UDP_TUNNEL_NIC_ENTRY_DEL; + } else { + from = UDP_TUNNEL_NIC_ENTRY_DEL; + to = UDP_TUNNEL_NIC_ENTRY_ADD; + } + + if (entry->flags & from) { + entry->flags &= ~from; + if (!dodgy) + return; + } + + udp_tunnel_nic_entry_queue(utn, entry, to); +} + +static bool +udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti, int use_cnt_adj) +{ + struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; + + if (udp_tunnel_nic_entry_is_free(entry) || + entry->port != ti->port || + entry->type != ti->type) + return false; + + if (udp_tunnel_nic_entry_is_frozen(entry)) + return true; + + udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj); + return true; +} + +/* Try to find existing matching entry and adjust its use count, instead of + * adding a new one. Returns true if entry was found. In case of delete the + * entry may have gotten removed in the process, in which case it will be + * queued for removal. + */ +static bool +udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti, int use_cnt_adj) +{ + const struct udp_tunnel_nic_table_info *table; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) { + table = &dev->udp_tunnel_nic_info->tables[i]; + if (!udp_tunnel_nic_table_is_capable(table, ti)) + continue; + + for (j = 0; j < table->n_entries; j++) + if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti, + use_cnt_adj)) + return true; + } + + return false; +} + +static bool +udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + return udp_tunnel_nic_try_existing(dev, utn, ti, +1); +} + +static bool +udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + return udp_tunnel_nic_try_existing(dev, utn, ti, -1); +} + +static bool +udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_table_info *table; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) { + table = &dev->udp_tunnel_nic_info->tables[i]; + if (!udp_tunnel_nic_table_is_capable(table, ti)) + continue; + + for (j = 0; j < table->n_entries; j++) { + struct udp_tunnel_nic_table_entry *entry; + + entry = &utn->entries[i][j]; + if (!udp_tunnel_nic_entry_is_free(entry)) + continue; + + entry->port = ti->port; + entry->type = ti->type; + entry->use_cnt = 1; + udp_tunnel_nic_entry_queue(utn, entry, + UDP_TUNNEL_NIC_ENTRY_ADD); + return true; + } + + /* The different table may still fit this port in, but there + * are no devices currently which have multiple tables accepting + * the same tunnel type, and false positives are okay. + */ + __set_bit(i, &utn->missed); + } + + return false; +} + +static void +__udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (!utn) + return; + if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY) + return; + + if (!udp_tunnel_nic_is_capable(dev, utn, ti)) + return; + + /* It may happen that a tunnel of one type is removed and different + * tunnel type tries to reuse its port before the device was informed. + * Rely on utn->missed to re-add this port later. + */ + if (udp_tunnel_nic_has_collision(dev, utn, ti)) + return; + + if (!udp_tunnel_nic_add_existing(dev, utn, ti)) + udp_tunnel_nic_add_new(dev, utn, ti); + + udp_tunnel_nic_device_sync(dev, utn); +} + +static void +__udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (!utn) + return; + + if (!udp_tunnel_nic_is_capable(dev, utn, ti)) + return; + + udp_tunnel_nic_del_existing(dev, utn, ti); + + udp_tunnel_nic_device_sync(dev, utn); +} + +static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + unsigned int i, j; + + ASSERT_RTNL(); + + utn = dev->udp_tunnel_nic; + if (!utn) + return; + + utn->need_sync = false; + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) { + struct udp_tunnel_nic_table_entry *entry; + + entry = &utn->entries[i][j]; + + entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | + UDP_TUNNEL_NIC_ENTRY_OP_FAIL); + /* We don't release rtnl across ops */ + WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); + if (!entry->use_cnt) + continue; + + udp_tunnel_nic_entry_queue(utn, entry, + UDP_TUNNEL_NIC_ENTRY_ADD); + } + + __udp_tunnel_nic_device_sync(dev, utn); +} + +static size_t +__udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + unsigned int j; + size_t size; + + utn = dev->udp_tunnel_nic; + if (!utn) + return 0; + + size = 0; + for (j = 0; j < info->tables[table].n_entries; j++) { + if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j])) + continue; + + size += nla_total_size(0) + /* _TABLE_ENTRY */ + nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */ + nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */ + } + + return size; +} + +static int +__udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, + struct sk_buff *skb) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + struct nlattr *nest; + unsigned int j; + + utn = dev->udp_tunnel_nic; + if (!utn) + return 0; + + for (j = 0; j < info->tables[table].n_entries; j++) { + if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j])) + continue; + + nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); + + if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, + utn->entries[table][j].port) || + nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, + ilog2(utn->entries[table][j].type))) + goto err_cancel; + + nla_nest_end(skb, nest); + } + + return 0; + +err_cancel: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { + .get_port = __udp_tunnel_nic_get_port, + .set_port_priv = __udp_tunnel_nic_set_port_priv, + .add_port = __udp_tunnel_nic_add_port, + .del_port = __udp_tunnel_nic_del_port, + .reset_ntf = __udp_tunnel_nic_reset_ntf, + .dump_size = __udp_tunnel_nic_dump_size, + .dump_write = __udp_tunnel_nic_dump_write, +}; + +static void +udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) { + int adj_cnt = -utn->entries[i][j].use_cnt; + + if (adj_cnt) + udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt); + } + + __udp_tunnel_nic_device_sync(dev, utn); + + for (i = 0; i < utn->n_tables; i++) + memset(utn->entries[i], 0, array_size(info->tables[i].n_entries, + sizeof(**utn->entries))); + WARN_ON(utn->need_sync); + utn->need_replay = 0; +} + +static void +udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + /* Freeze all the ports we are already tracking so that the replay + * does not double up the refcount. + */ + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]); + utn->missed = 0; + utn->need_replay = 0; + + udp_tunnel_get_rx_info(dev); + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]); +} + +static void udp_tunnel_nic_device_sync_work(struct work_struct *work) +{ + struct udp_tunnel_nic *utn = + container_of(work, struct udp_tunnel_nic, work); + + rtnl_lock(); + utn->work_pending = 0; + __udp_tunnel_nic_device_sync(utn->dev, utn); + + if (utn->need_replay) + udp_tunnel_nic_replay(utn->dev, utn); + rtnl_unlock(); +} + +static struct udp_tunnel_nic * +udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, + unsigned int n_tables) +{ + struct udp_tunnel_nic *utn; + unsigned int i; + + utn = kzalloc(sizeof(*utn), GFP_KERNEL); + if (!utn) + return NULL; + utn->n_tables = n_tables; + INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); + + utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL); + if (!utn->entries) + goto err_free_utn; + + for (i = 0; i < n_tables; i++) { + utn->entries[i] = kcalloc(info->tables[i].n_entries, + sizeof(*utn->entries[i]), GFP_KERNEL); + if (!utn->entries[i]) + goto err_free_prev_entries; + } + + return utn; + +err_free_prev_entries: + while (i--) + kfree(utn->entries[i]); + kfree(utn->entries); +err_free_utn: + kfree(utn); + return NULL; +} + +static int udp_tunnel_nic_register(struct net_device *dev) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + unsigned int n_tables, i; + + BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < + UDP_TUNNEL_NIC_MAX_TABLES); + + if (WARN_ON(!info->set_port != !info->unset_port) || + WARN_ON(!info->set_port == !info->sync_table) || + WARN_ON(!info->tables[0].n_entries)) + return -EINVAL; + + n_tables = 1; + for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { + if (!info->tables[i].n_entries) + continue; + + n_tables++; + if (WARN_ON(!info->tables[i - 1].n_entries)) + return -EINVAL; + } + + utn = udp_tunnel_nic_alloc(info, n_tables); + if (!utn) + return -ENOMEM; + + utn->dev = dev; + dev_hold(dev); + dev->udp_tunnel_nic = utn; + + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + udp_tunnel_get_rx_info(dev); + + return 0; +} + +static void +udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + unsigned int i; + + /* Flush before we check work, so we don't waste time adding entries + * from the work which we will boot immediately. + */ + udp_tunnel_nic_flush(dev, utn); + + /* Wait for the work to be done using the state, netdev core will + * retry unregister until we give up our reference on this device. + */ + if (utn->work_pending) + return; + + for (i = 0; i < utn->n_tables; i++) + kfree(utn->entries[i]); + kfree(utn->entries); + kfree(utn); + dev->udp_tunnel_nic = NULL; + dev_put(dev); +} + +static int +udp_tunnel_nic_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + const struct udp_tunnel_nic_info *info; + struct udp_tunnel_nic *utn; + + info = dev->udp_tunnel_nic_info; + if (!info) + return NOTIFY_DONE; + + if (event == NETDEV_REGISTER) { + int err; + + err = udp_tunnel_nic_register(dev); + if (err) + netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err); + return notifier_from_errno(err); + } + /* All other events will need the udp_tunnel_nic state */ + utn = dev->udp_tunnel_nic; + if (!utn) + return NOTIFY_DONE; + + if (event == NETDEV_UNREGISTER) { + udp_tunnel_nic_unregister(dev, utn); + return NOTIFY_OK; + } + + /* All other events only matter if NIC has to be programmed open */ + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + return NOTIFY_DONE; + + if (event == NETDEV_UP) { + WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); + udp_tunnel_get_rx_info(dev); + return NOTIFY_OK; + } + if (event == NETDEV_GOING_DOWN) { + udp_tunnel_nic_flush(dev, utn); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = { + .notifier_call = udp_tunnel_nic_netdevice_event, +}; + +static int __init udp_tunnel_nic_init_module(void) +{ + int err; + + udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0); + if (!udp_tunnel_nic_workqueue) + return -ENOMEM; + + rtnl_lock(); + udp_tunnel_nic_ops = &__udp_tunnel_nic_ops; + rtnl_unlock(); + + err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block); + if (err) + goto err_unset_ops; + + return 0; + +err_unset_ops: + rtnl_lock(); + udp_tunnel_nic_ops = NULL; + rtnl_unlock(); + destroy_workqueue(udp_tunnel_nic_workqueue); + return err; +} +late_initcall(udp_tunnel_nic_init_module); + +static void __exit udp_tunnel_nic_cleanup_module(void) +{ + unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block); + + rtnl_lock(); + udp_tunnel_nic_ops = NULL; + rtnl_unlock(); + + destroy_workqueue(udp_tunnel_nic_workqueue); +} +module_exit(udp_tunnel_nic_cleanup_module); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp_tunnel_stub.c b/net/ipv4/udp_tunnel_stub.c new file mode 100644 index 0000000000000..c4b2888f5fef1 --- /dev/null +++ b/net/ipv4/udp_tunnel_stub.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020 Facebook Inc. + +#include + +const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; +EXPORT_SYMBOL_GPL(udp_tunnel_nic_ops); diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh new file mode 100644 index 0000000000000..ba1d53b9f815b --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -0,0 +1,786 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +VNI_GEN=$RANDOM +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID +NSIM_NETDEV= +HAS_ETHTOOL= +EXIT_STATUS=0 +num_cases=0 +num_errors=0 + +clean_up_devs=( ) + +function err_cnt { + echo "ERROR:" $@ + EXIT_STATUS=1 + ((num_errors++)) + ((num_cases++)) +} + +function pass_cnt { + ((num_cases++)) +} + +function cleanup_tuns { + for dev in "${clean_up_devs[@]}"; do + [ -e /sys/class/net/$dev ] && ip link del dev $dev + done + clean_up_devs=( ) +} + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_tuns + cleanup_nsim +} + +trap cleanup EXIT + +function new_vxlan { + local dev=$1 + local dstport=$2 + local lower=$3 + local ipver=$4 + local flags=$5 + + local group ipfl + + [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))" + + ip $ipfl link add $dev type vxlan \ + group $group \ + dev $lower \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function new_geneve { + local dev=$1 + local dstport=$2 + local ipver=$3 + local flags=$4 + + local group ipfl + + [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))" + + ip $ipfl link add $dev type geneve \ + remote $remote \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function del_dev { + local dev=$1 + + ip link del dev $dev + check_tables +} + +# Helpers for netdevsim port/type encoding +function mke { + local port=$1 + local type=$2 + + echo $((port << 16 | type)) +} + +function pre { + local val=$1 + + echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))" +} + +function pre_ethtool { + local val=$1 + local port=$((val >> 16)) + local type=$((val & 0xffff)) + + case $type in + 1) + type_name="vxlan" + ;; + 2) + type_name="geneve" + ;; + 4) + type_name="vxlan-gpe" + ;; + *) + type_name="bit X" + ;; + esac + + echo "port $port, $type_name" +} + +function check_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local -n expected=$2 + local last=$3 + + read -a have < $path + + if [ ${#expected[@]} -ne ${#have[@]} ]; then + echo "check_table: BAD NUMBER OF ITEMS" + return 0 + fi + + for i in "${!expected[@]}"; do + if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then + pp_expected=`pre_ethtool ${expected[i]}` + ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null + if [ $? -ne 0 -a $last -ne 0 ]; then + err_cnt "ethtool table $1 on port $port: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + return 0 + + fi + fi + + if [ ${expected[i]} != ${have[i]} ]; then + if [ $last -ne 0 ]; then + err_cnt "table $1 on port $port: $pfx - $msg" + echo " check_table: wrong entry $i" + echo " expected: `pre ${expected[i]}`" + echo " have: `pre ${have[i]}`" + return 0 + fi + return 1 + fi + done + + pass_cnt + return 0 +} + +function check_tables { + # Need retries in case we have workqueue making the changes + local retries=10 + + while ! check_table 0 exp0 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done + while ! check_table 1 exp1 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done +} + +function print_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + read -a have < $path + + tree $NSIM_DEV_DFS/ + + echo "Port $port table $1:" + + for i in "${!have[@]}"; do + echo " `pre ${have[i]}`" + done + +} + +function print_tables { + print_table 0 + print_table 1 +} + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +### +### Code start +### + +# Probe ethtool support +ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y + +modprobe netdevsim + +# Basic test +pfx="basic" + +for port in 0 1; do + old_netdevs=$(ls /sys/class/net) + if [ $port -eq 0 ]; then + echo $NSIM_ID > /sys/bus/netdevsim/new_device + else + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + echo 1 > $NSIM_DEV_SYS/new_port + fi + NSIM_NETDEV=`get_netdev_name old_netdevs` + + msg="new NIC device created" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + msg="VxLAN v4 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlan0 4789 $NSIM_NETDEV + new_vxlan vxlan1 4789 $NSIM_NETDEV + + msg="VxLAN v4 devices go down" + exp0=( 0 0 0 0 ) + ifconfig vxlan1 down + ifconfig vxlan0 down + check_tables + + msg="VxLAN v6 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlanA 4789 $NSIM_NETDEV 6 + + for ifc in vxlan0 vxlan1; do + ifconfig $ifc up + done + + new_vxlan vxlanB 4789 $NSIM_NETDEV 6 + + msg="another VxLAN v6 devices" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + + msg="Geneve device" + exp1=( `mke 6081 2` 0 0 0 ) + new_geneve gnv0 6081 + + msg="NIC device goes down" + ifconfig $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + msg="NIC device goes up again" + ifconfig $NSIM_NETDEV up + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + check_tables + + cleanup_tuns + + msg="tunnels destroyed" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + modprobe -r geneve + modprobe -r vxlan + modprobe -r udp_tunnel + + check_tables +done + +modprobe -r netdevsim + +# Module tests +pfx="module tests" + +if modinfo netdevsim | grep udp_tunnel >/dev/null; then + err_cnt "netdevsim depends on udp_tunnel" +else + pass_cnt +fi + +modprobe netdevsim + +old_netdevs=$(ls /sys/class/net) +port=0 +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep +echo 0 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="create VxLANs" +exp0=( 0 0 0 0 ) # sleep is longer than out wait +new_vxlan vxlan0 10000 $NSIM_NETDEV + +modprobe -r vxlan +modprobe -r udp_tunnel + +msg="remove tunnels" +exp0=( 0 0 0 0 ) +check_tables + +msg="create VxLANs" +exp0=( 0 0 0 0 ) # sleep is longer than out wait +new_vxlan vxlan0 10000 $NSIM_NETDEV + +exp0=( 0 0 0 0 ) + +modprobe -r netdevsim +modprobe netdevsim + +# Overflow the table + +function overflow_table0 { + local pfx=$1 + + msg="create VxLANs 1/5" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="create VxLANs 2/5" + exp0=( `mke 10000 1` `mke 10001 1` 0 0 ) + new_vxlan vxlan1 10001 $NSIM_NETDEV + + msg="create VxLANs 3/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 ) + new_vxlan vxlan2 10002 $NSIM_NETDEV + + msg="create VxLANs 4/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` ) + new_vxlan vxlan3 10003 $NSIM_NETDEV + + msg="create VxLANs 5/5" + new_vxlan vxlan4 10004 $NSIM_NETDEV +} + +function overflow_table1 { + local pfx=$1 + + msg="create GENEVE 1/5" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="create GENEVE 2/5" + exp1=( `mke 20000 2` `mke 20001 2` 0 0 ) + new_geneve gnv1 20001 + + msg="create GENEVE 3/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 ) + new_geneve gnv2 20002 + + msg="create GENEVE 4/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` ) + new_geneve gnv3 20003 + + msg="create GENEVE 5/5" + new_geneve gnv4 20004 +} + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Sync all +pfx="sync all" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Destroy full NIC +pfx="destroy full" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "destroy NIC" + overflow_table1 "destroy NIC" + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# IPv4 only +pfx="IPv4 only" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v6" + new_vxlan vxlanA1 10000 $NSIM_NETDEV 6 + + ip link set dev vxlanA0 down + ip link set dev vxlanA0 up + check_tables + + msg="create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="down VxLANs v4" + exp0=( 0 0 0 0 ) + ip link set dev vxlan0 down + check_tables + + msg="up VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + ip link set dev vxlan0 up + check_tables + + msg="destroy VxLANs v4" + exp0=( 0 0 0 0 ) + del_dev vxlan0 + + msg="recreate VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + del_dev vxlanA0 + del_dev vxlanA1 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Failures +pfx="error injection" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + + msg="1 - create VxLANs v6" + exp0=( 0 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="1 - create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="1 - remove VxLANs v4" + del_dev vxlan0 + + msg="1 - remove VxLANs v6" + exp0=( 0 0 0 0 ) + del_dev vxlanA0 + + msg="2 - create GENEVE" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="2 - destroy GENEVE" + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + exp1=( `mke 20000 2` 0 0 0 ) + del_dev gnv0 + + msg="2 - create second GENEVE" + exp1=( 0 `mke 20001 2` 0 0 ) + new_geneve gnv0 20001 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# netdev flags +pfx="netdev flags" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="turn off" + exp0=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + + msg="create VxLANs v4 - off" + exp0=( 0 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="created off - turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# device initiated reset +pfx="reset notification" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="NIC device goes down" + ifconfig $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="NIC device goes up again" + ifconfig $NSIM_NETDEV up + exp0=( `mke 10000 1` 0 0 0 ) + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +modprobe -r netdevsim + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $num_cases checks" +else + echo "FAILED $num_errors/$num_cases checks" +fi + +exit $EXIT_STATUS