From f055a9dfee8508173a35169372bdedcfac49d0f6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Fri, 29 Jun 2018 17:04:34 -0700 Subject: [PATCH 1/9] nfp: expose ring stats of inactive rings via ethtool After user changes the ring count statistics for deactivated rings disappear from ethtool -S output. This causes loss of information to the user and means that ethtool stats may not add up to interface stats. Always expose counters from all the rings. Note that we allocate at most num_possible_cpus() rings so number of rings should be reasonable. The alternative of only listing stats for rings which were ever in use could be confusing. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/netronome/nfp/nfp_net_ethtool.c | 50 +++++++------------ 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 26d1cc4e29061..2aeb4622f1ea3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -452,7 +452,7 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); - return NN_RVEC_GATHER_STATS + nn->dp.num_r_vecs * NN_RVEC_PER_Q_STATS; + return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS; } static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) @@ -460,7 +460,7 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) struct nfp_net *nn = netdev_priv(netdev); int i; - for (i = 0; i < nn->dp.num_r_vecs; i++) { + for (i = 0; i < nn->max_r_vecs; i++) { data = nfp_pr_et(data, "rvec_%u_rx_pkts", i); data = nfp_pr_et(data, "rvec_%u_tx_pkts", i); data = nfp_pr_et(data, "rvec_%u_tx_busy", i); @@ -486,7 +486,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) u64 tmp[NN_RVEC_GATHER_STATS]; unsigned int i, j; - for (i = 0; i < nn->dp.num_r_vecs; i++) { + for (i = 0; i < nn->max_r_vecs; i++) { unsigned int start; do { @@ -521,15 +521,13 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) return data; } -static unsigned int -nfp_vnic_get_hw_stats_count(unsigned int rx_rings, unsigned int tx_rings) +static unsigned int nfp_vnic_get_hw_stats_count(unsigned int num_vecs) { - return NN_ET_GLOBAL_STATS_LEN + (rx_rings + tx_rings) * 2; + return NN_ET_GLOBAL_STATS_LEN + num_vecs * 4; } static u8 * -nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings, - unsigned int tx_rings, bool repr) +nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr) { int swap_off, i; @@ -549,36 +547,29 @@ nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings, for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++) data = nfp_pr_et(data, nfp_net_et_stats[i].name); - for (i = 0; i < tx_rings; i++) { - data = nfp_pr_et(data, "txq_%u_pkts", i); - data = nfp_pr_et(data, "txq_%u_bytes", i); - } - - for (i = 0; i < rx_rings; i++) { + for (i = 0; i < num_vecs; i++) { data = nfp_pr_et(data, "rxq_%u_pkts", i); data = nfp_pr_et(data, "rxq_%u_bytes", i); + data = nfp_pr_et(data, "txq_%u_pkts", i); + data = nfp_pr_et(data, "txq_%u_bytes", i); } return data; } static u64 * -nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, - unsigned int rx_rings, unsigned int tx_rings) +nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs) { unsigned int i; for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) *data++ = readq(mem + nfp_net_et_stats[i].off); - for (i = 0; i < tx_rings; i++) { - *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); - *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); - } - - for (i = 0; i < rx_rings; i++) { + for (i = 0; i < num_vecs; i++) { *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); } return data; @@ -633,8 +624,7 @@ static void nfp_net_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: data = nfp_vnic_get_sw_stats_strings(netdev, data); - data = nfp_vnic_get_hw_stats_strings(data, nn->dp.num_rx_rings, - nn->dp.num_tx_rings, + data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs, false); data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(nn->port, data); @@ -649,8 +639,7 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_net *nn = netdev_priv(netdev); data = nfp_vnic_get_sw_stats(netdev, data); - data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, - nn->dp.num_rx_rings, nn->dp.num_tx_rings); + data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs); data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(nn->port, data); } @@ -662,8 +651,7 @@ static int nfp_net_get_sset_count(struct net_device *netdev, int sset) switch (sset) { case ETH_SS_STATS: return nfp_vnic_get_sw_stats_count(netdev) + - nfp_vnic_get_hw_stats_count(nn->dp.num_rx_rings, - nn->dp.num_tx_rings) + + nfp_vnic_get_hw_stats_count(nn->max_r_vecs) + nfp_mac_get_stats_count(netdev) + nfp_app_port_get_stats_count(nn->port); default: @@ -679,7 +667,7 @@ static void nfp_port_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: if (nfp_port_is_vnic(port)) - data = nfp_vnic_get_hw_stats_strings(data, 0, 0, true); + data = nfp_vnic_get_hw_stats_strings(data, 0, true); else data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(port, data); @@ -694,7 +682,7 @@ nfp_port_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_port *port = nfp_port_from_netdev(netdev); if (nfp_port_is_vnic(port)) - data = nfp_vnic_get_hw_stats(data, port->vnic, 0, 0); + data = nfp_vnic_get_hw_stats(data, port->vnic, 0); else data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(port, data); @@ -708,7 +696,7 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset) switch (sset) { case ETH_SS_STATS: if (nfp_port_is_vnic(port)) - count = nfp_vnic_get_hw_stats_count(0, 0); + count = nfp_vnic_get_hw_stats_count(0); else count = nfp_mac_get_stats_count(netdev); count += nfp_app_port_get_stats_count(port); From 18aa5b180f00a10c2f63944b4f0ab116bf8ea19b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Fri, 29 Jun 2018 17:04:35 -0700 Subject: [PATCH 2/9] nfp: fail probe if serial or interface id is missing On some platforms with broken ACPI tables we may not have access to the Serial Number PCIe capability. This capability is crucial for us for switchdev operation as we use serial number as switch ID, and for communication with management FW where interface ID is used. If we can't determine the Serial Number we have to fail device probe. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../netronome/nfp/nfpcore/nfp6000_pcie.c | 16 +++++++++----- .../ethernet/netronome/nfp/nfpcore/nfp_cpp.h | 4 ++-- .../netronome/nfp/nfpcore/nfp_cppcore.c | 22 ++++++++++++++----- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 749655c329b24..c8d0b1016a646 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -1248,7 +1248,7 @@ static void nfp6000_free(struct nfp_cpp *cpp) kfree(nfp); } -static void nfp6000_read_serial(struct device *dev, u8 *serial) +static int nfp6000_read_serial(struct device *dev, u8 *serial) { struct pci_dev *pdev = to_pci_dev(dev); int pos; @@ -1256,25 +1256,29 @@ static void nfp6000_read_serial(struct device *dev, u8 *serial) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN); if (!pos) { - memset(serial, 0, NFP_SERIAL_LEN); - return; + dev_err(dev, "can't find PCIe Serial Number Capability\n"); + return -EINVAL; } pci_read_config_dword(pdev, pos + 4, ®); put_unaligned_be16(reg >> 16, serial + 4); pci_read_config_dword(pdev, pos + 8, ®); put_unaligned_be32(reg, serial); + + return 0; } -static u16 nfp6000_get_interface(struct device *dev) +static int nfp6000_get_interface(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); int pos; u32 reg; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN); - if (!pos) - return NFP_CPP_INTERFACE(NFP_CPP_INTERFACE_TYPE_PCI, 0, 0xff); + if (!pos) { + dev_err(dev, "can't find PCIe Serial Number Capability\n"); + return -EINVAL; + } pci_read_config_dword(pdev, pos + 4, ®); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index b0da3d4368505..c338d539fa967 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -364,8 +364,8 @@ struct nfp_cpp_operations { int (*init)(struct nfp_cpp *cpp); void (*free)(struct nfp_cpp *cpp); - void (*read_serial)(struct device *dev, u8 *serial); - u16 (*get_interface)(struct device *dev); + int (*read_serial)(struct device *dev, u8 *serial); + int (*get_interface)(struct device *dev); int (*area_init)(struct nfp_cpp_area *area, u32 dest, unsigned long long address, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index ef30597aa3196..73de57a09800d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -1163,10 +1163,10 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, { const u32 arm = NFP_CPP_ID(NFP_CPP_TARGET_ARM, NFP_CPP_ACTION_RW, 0); struct nfp_cpp *cpp; + int ifc, err; u32 mask[2]; u32 xpbaddr; size_t tgt; - int err; cpp = kzalloc(sizeof(*cpp), GFP_KERNEL); if (!cpp) { @@ -1176,9 +1176,19 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, cpp->op = ops; cpp->priv = priv; - cpp->interface = ops->get_interface(parent); - if (ops->read_serial) - ops->read_serial(parent, cpp->serial); + + ifc = ops->get_interface(parent); + if (ifc < 0) { + err = ifc; + goto err_free_cpp; + } + cpp->interface = ifc; + if (ops->read_serial) { + err = ops->read_serial(parent, cpp->serial); + if (err) + goto err_free_cpp; + } + rwlock_init(&cpp->resource_lock); init_waitqueue_head(&cpp->waitq); lockdep_set_class(&cpp->resource_lock, &nfp_cpp_resource_lock_key); @@ -1191,7 +1201,7 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, err = device_register(&cpp->dev); if (err < 0) { put_device(&cpp->dev); - goto err_dev; + goto err_free_cpp; } dev_set_drvdata(&cpp->dev, cpp); @@ -1238,7 +1248,7 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, err_out: device_unregister(&cpp->dev); -err_dev: +err_free_cpp: kfree(cpp); err_malloc: return ERR_PTR(err); From 670b5274ff976a7eed57fd303460a8a9de267c0e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Fri, 29 Jun 2018 17:04:36 -0700 Subject: [PATCH 3/9] nfp: implement netpoll ndo (thus enabling netconsole) NFP NAPI handling will only complete the TXed packets when called with budget of 0, implement ndo_poll_controller by scheduling NAPI on all TX queues. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/netronome/nfp/nfp_net_common.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index d4c27f849f9bb..edc6ef682f6de 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3115,6 +3115,21 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL); } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void nfp_net_netpoll(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + /* nfp_net's NAPIs are statically allocated so even if there is a race + * with reconfig path this will simply try to schedule some disabled + * NAPI instances. + */ + for (i = 0; i < nn->dp.num_stack_tx_rings; i++) + napi_schedule_irqoff(&nn->r_vecs[i].napi); +} +#endif + static void nfp_net_stat64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -3482,6 +3497,9 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_get_stats64 = nfp_net_stat64, .ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = nfp_net_netpoll, +#endif .ndo_set_vf_mac = nfp_app_set_vf_mac, .ndo_set_vf_vlan = nfp_app_set_vf_vlan, .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, From d387b8a19a3921e291ebeb26b55495479cd36b21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Fri, 29 Jun 2018 17:04:37 -0700 Subject: [PATCH 4/9] nfp: make use of napi_consume_skb() Use napi_consume_skb() in nfp_net_tx_complete() to get bulk free. Pass 0 as budget for ctrl queue completion since it runs out of a tasklet. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index edc6ef682f6de..7df5ca37bfb8b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -945,11 +945,12 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) /** * nfp_net_tx_complete() - Handled completed TX packets - * @tx_ring: TX ring structure + * @tx_ring: TX ring structure + * @budget: NAPI budget (only used as bool to determine if in NAPI context) * * Return: Number of completed TX descriptors */ -static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) +static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; @@ -999,7 +1000,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) /* check for last gather fragment */ if (fidx == nr_frags - 1) - dev_consume_skb_any(skb); + napi_consume_skb(skb, budget); tx_ring->txbufs[idx].dma_addr = 0; tx_ring->txbufs[idx].skb = NULL; @@ -1828,7 +1829,7 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) unsigned int pkts_polled = 0; if (r_vec->tx_ring) - nfp_net_tx_complete(r_vec->tx_ring); + nfp_net_tx_complete(r_vec->tx_ring, budget); if (r_vec->rx_ring) pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); @@ -2062,7 +2063,7 @@ static void nfp_ctrl_poll(unsigned long arg) struct nfp_net_r_vector *r_vec = (void *)arg; spin_lock_bh(&r_vec->lock); - nfp_net_tx_complete(r_vec->tx_ring); + nfp_net_tx_complete(r_vec->tx_ring, 0); __nfp_ctrl_tx_queued(r_vec); spin_unlock_bh(&r_vec->lock); From 5d4b0b4068f1dc637d8a9b53c2960bbad8ce654d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Fri, 29 Jun 2018 17:04:38 -0700 Subject: [PATCH 5/9] nfp: populate bus-info on representors We used to leave bus-info in ethtool driver info empty for representors in case multi-PCIe-to-single-host cards make the association between PCIe device and NFP many to one. It seems these attempts are futile, we need to link the representors to one PCIe device in sysfs to get consistent naming, plus devlink uses one PCIe as a handle, anyway. The multi-PCIe-to-single-system support won't be clean, if it ever comes. Turns out some user space (RHEL tests) likes to read bus-info so just populate it. While at it remove unnecessary app NULL-check, representors are spawned by an app, so it must exist. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 2aeb4622f1ea3..6a79c8e4a7a40 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -233,12 +233,10 @@ nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) static void nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - struct nfp_app *app; - - app = nfp_app_from_netdev(netdev); - if (!app) - return; + struct nfp_app *app = nfp_app_from_netdev(netdev); + strlcpy(drvinfo->bus_info, pci_name(app->pdev), + sizeof(drvinfo->bus_info)); nfp_get_drvinfo(app, app->pdev, "*", drvinfo); } From ed8f2b52b622bbc550dadb69d3f10d81528f9fbc Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com> Date: Fri, 29 Jun 2018 17:04:39 -0700 Subject: [PATCH 6/9] nfp: flower: ignore checksum actions when performing pedit actions Hardware will automatically update csum in headers when a set action has been performed. This means we could in the driver ignore the explicit checksum action when performing a set action. Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/netronome/nfp/flower/action.c | 80 +++++++++++++++++-- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 4a6d2db750719..61ba8d4f99f1a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -34,6 +34,7 @@ #include <linux/bitfield.h> #include <net/pkt_cls.h> #include <net/switchdev.h> +#include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_pedit.h> @@ -398,8 +399,27 @@ nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, return 0; } +static u32 nfp_fl_csum_l4_to_flag(u8 ip_proto) +{ + switch (ip_proto) { + case 0: + /* Filter doesn't force proto match, + * both TCP and UDP will be updated if encountered + */ + return TCA_CSUM_UPDATE_FLAG_TCP | TCA_CSUM_UPDATE_FLAG_UDP; + case IPPROTO_TCP: + return TCA_CSUM_UPDATE_FLAG_TCP; + case IPPROTO_UDP: + return TCA_CSUM_UPDATE_FLAG_UDP; + default: + /* All other protocols will be ignored by FW */ + return 0; + } +} + static int -nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) +nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, + char *nfp_action, int *a_len, u32 *csum_updated) { struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; struct nfp_fl_set_ip4_addrs set_ip_addr; @@ -409,6 +429,7 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) int idx, nkeys, err; size_t act_size; u32 offset, cmd; + u8 ip_proto = 0; memset(&set_ip6_dst, 0, sizeof(set_ip6_dst)); memset(&set_ip6_src, 0, sizeof(set_ip6_src)); @@ -451,6 +472,15 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) return err; } + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *basic; + + basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + flow->key); + ip_proto = basic->ip_proto; + } + if (set_eth.head.len_lw) { act_size = sizeof(set_eth); memcpy(nfp_action, &set_eth, act_size); @@ -459,6 +489,10 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) act_size = sizeof(set_ip_addr); memcpy(nfp_action, &set_ip_addr, act_size); *a_len += act_size; + + /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ + *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | + nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { /* TC compiles set src and dst IPv6 address as a single action, * the hardware requires this to be 2 separate actions. @@ -471,18 +505,30 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_dst.head.len_lw) { act_size = sizeof(set_ip6_dst); memcpy(nfp_action, &set_ip6_dst, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_src.head.len_lw) { act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_tport.head.len_lw) { act_size = sizeof(set_tport); memcpy(nfp_action, &set_tport, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } return 0; @@ -493,12 +539,18 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, bool last, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, - int *out_cnt) + int *out_cnt, u32 *csum_updated) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_output *output; int err, prelag_size; + /* If csum_updated has not been reset by now, it means HW will + * incorrectly update csums when they are not requested. + */ + if (*csum_updated) + return -EOPNOTSUPP; + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) return -EOPNOTSUPP; @@ -529,10 +581,11 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, static int nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, + struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, - int *out_cnt) + int *out_cnt, u32 *csum_updated) { struct nfp_fl_set_ipv4_udp_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; @@ -545,14 +598,14 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, } else if (is_tcf_mirred_egress_redirect(a)) { err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, true, tun_type, tun_out_cnt, - out_cnt); + out_cnt, csum_updated); if (err) return err; } else if (is_tcf_mirred_egress_mirror(a)) { err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, false, tun_type, tun_out_cnt, - out_cnt); + out_cnt, csum_updated); if (err) return err; @@ -602,8 +655,17 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, /* Tunnel decap is handled by default so accept action. */ return 0; } else if (is_tcf_pedit(a)) { - if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len)) + if (nfp_fl_pedit(a, flow, &nfp_fl->action_data[*a_len], + a_len, csum_updated)) return -EOPNOTSUPP; + } else if (is_tcf_csum(a)) { + /* csum action requests recalc of something we have not fixed */ + if (tcf_csum_update_flags(a) & ~*csum_updated) + return -EOPNOTSUPP; + /* If we will correctly fix the csum we can remove it from the + * csum update list. Which will later be used to check support. + */ + *csum_updated &= ~tcf_csum_update_flags(a); } else { /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; @@ -620,6 +682,7 @@ int nfp_flower_compile_action(struct nfp_app *app, int act_len, act_cnt, err, tun_out_cnt, out_cnt; enum nfp_flower_tun_type tun_type; const struct tc_action *a; + u32 csum_updated = 0; LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); @@ -632,8 +695,9 @@ int nfp_flower_compile_action(struct nfp_app *app, tcf_exts_to_list(flow->exts, &actions); list_for_each_entry(a, &actions, list) { - err = nfp_flower_loop_action(app, a, nfp_flow, &act_len, netdev, - &tun_type, &tun_out_cnt, &out_cnt); + err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, + netdev, &tun_type, &tun_out_cnt, + &out_cnt, &csum_updated); if (err) return err; act_cnt++; From ed21b637e940e7680fc52eb10a5d9ee74715cd38 Mon Sep 17 00:00:00 2001 From: John Hurley <john.hurley@netronome.com> Date: Fri, 29 Jun 2018 17:04:40 -0700 Subject: [PATCH 7/9] nfp: flower: extract ipv4 udp tunnel ttl from route Previously the ttl for ipv4 udp tunnels was set to the namespace default. Modify this to attempt to extract the ttl from a full route lookup on the tunnel destination. If this is not possible then resort to the default. Signed-off-by: John Hurley <john.hurley@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/netronome/nfp/flower/action.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 61ba8d4f99f1a..d421b7fbce967 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -236,9 +236,12 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); u32 tmp_set_ip_tun_type_index = 0; + struct flowi4 flow = {}; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + struct rtable *rt; struct net *net; + int err; if (ip_tun->options_len) return -EOPNOTSUPP; @@ -255,7 +258,21 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + + /* Do a route lookup to determine ttl - if fails then use default. + * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so + * must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + } /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; From 51a8cefc6e3d8dd4bb18918b07faa5715f877aa6 Mon Sep 17 00:00:00 2001 From: John Hurley <john.hurley@netronome.com> Date: Fri, 29 Jun 2018 17:04:41 -0700 Subject: [PATCH 8/9] nfp: flower: offload tos and tunnel flags for ipv4 udp tunnels Extract the tos and the tunnel flags from the tunnel key and offload these action fields. Only the checksum and tunnel key flags are implemented in fw so reject offloads of other flags. The tunnel key flag is always considered set in the fw so enforce that it is set in the rule. Note that the compulsory setting of the tunnel key flag and optional setting of checksum is inline with how tc currently generates ipv4 udp tunnel actions. Signed-off-by: John Hurley <john.hurley@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/netronome/nfp/flower/action.c | 9 +++++++++ drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index d421b7fbce967..e56b815a8dc6c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -45,6 +45,8 @@ #include "main.h" #include "../nfp_net_repr.h" +#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (TUNNEL_CSUM | TUNNEL_KEY) + static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { size_t act_size = sizeof(struct nfp_fl_pop_vlan); @@ -274,6 +276,13 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; } + set_tun->tos = ip_tun->key.tos; + + if (!(ip_tun->key.tun_flags & TUNNEL_KEY) || + ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) + return -EOPNOTSUPP; + set_tun->tun_flags = ip_tun->key.tun_flags; + /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 4a7f3510a2968..15f1eacd76b6d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -203,9 +203,9 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 reserved; __be64 tun_id __packed; __be32 tun_type_index; - __be16 reserved2; + __be16 tun_flags; u8 ttl; - u8 reserved3; + u8 tos; __be32 extra[2]; }; From 635cf43dbddd166cd702c7883c837b9a3ace4565 Mon Sep 17 00:00:00 2001 From: John Hurley <john.hurley@netronome.com> Date: Fri, 29 Jun 2018 17:04:42 -0700 Subject: [PATCH 9/9] nfp: flower: enabled offloading of Team LAG Currently the NFP fw only supports L3/L4 hashing so rejects the offload of filters that output to LAG ports implementing other hash algorithms. Team, however, uses a BPF function for the hash that is not defined. To support Team offload, accept hashes that are defined as 'unknown' (only Team defines such hash types). In this case, use the NFP default of L3/L4 hashing for egress port selection. Signed-off-by: John Hurley <john.hurley@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/netronome/nfp/flower/lag_conf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index 0c4c957717ea4..bf10598f66ae0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -564,8 +564,9 @@ nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, if (lag_upper_info && lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH || - (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && - lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) { + (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) { can_offload = false; nfp_flower_cmsg_warn(priv->app, "Unable to offload tx_type %u hash %u\n",