From 59361316afcb08569af21e1af83e89c7051c055a Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 2 Aug 2018 10:13:10 -0700 Subject: [PATCH 01/10] igb: reduce CPU0 latency when updating statistics This change is based off of the work and suggestion of Jan Jablonsky . The Watchdog workqueue in igb driver is scheduled every 2s for each network interface. That includes updating a statistics protected by spinlock. Function igb_update_stats in this case will be protected against preemption. According to number of a statistics registers (cca 60), processing this function might cause additional cpu load on CPU0. In case of statistics spinlock may be replaced with mutex, which reduce latency on CPU0. CC: Bernhard Kaindl CC: Jan Jablonsky Signed-off-by: Jeff Kirsher Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 2 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 4 ++-- drivers/net/ethernet/intel/igb/igb_main.c | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index ca54e268d157b..fe1592ae87690 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -515,7 +515,7 @@ struct igb_adapter { /* OS defined structs */ struct pci_dev *pdev; - spinlock_t stats64_lock; + struct mutex stats64_lock; struct rtnl_link_stats64 stats64; /* structs defined in e1000_hw.h */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index c576710682452..7426060b678fd 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2295,7 +2295,7 @@ static void igb_get_ethtool_stats(struct net_device *netdev, int i, j; char *p; - spin_lock(&adapter->stats64_lock); + mutex_lock(&adapter->stats64_lock); igb_update_stats(adapter); for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { @@ -2338,7 +2338,7 @@ static void igb_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); i += IGB_RX_QUEUE_STATS_LEN; } - spin_unlock(&adapter->stats64_lock); + mutex_unlock(&adapter->stats64_lock); } static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 453ae1d9e5f32..bb4f3f64fbf01 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2203,9 +2203,9 @@ void igb_down(struct igb_adapter *adapter) del_timer_sync(&adapter->phy_info_timer); /* record the stats before reset*/ - spin_lock(&adapter->stats64_lock); + mutex_lock(&adapter->stats64_lock); igb_update_stats(adapter); - spin_unlock(&adapter->stats64_lock); + mutex_unlock(&adapter->stats64_lock); adapter->link_speed = 0; adapter->link_duplex = 0; @@ -3840,7 +3840,7 @@ static int igb_sw_init(struct igb_adapter *adapter) adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; spin_lock_init(&adapter->nfc_lock); - spin_lock_init(&adapter->stats64_lock); + mutex_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -5406,9 +5406,9 @@ static void igb_watchdog_task(struct work_struct *work) } } - spin_lock(&adapter->stats64_lock); + mutex_lock(&adapter->stats64_lock); igb_update_stats(adapter); - spin_unlock(&adapter->stats64_lock); + mutex_unlock(&adapter->stats64_lock); for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *tx_ring = adapter->tx_ring[i]; @@ -6235,10 +6235,10 @@ static void igb_get_stats64(struct net_device *netdev, { struct igb_adapter *adapter = netdev_priv(netdev); - spin_lock(&adapter->stats64_lock); + mutex_lock(&adapter->stats64_lock); igb_update_stats(adapter); memcpy(stats, &adapter->stats64, sizeof(*stats)); - spin_unlock(&adapter->stats64_lock); + mutex_unlock(&adapter->stats64_lock); } /** From 6f9ae17530f996e90268dae5ee843e432e40ccb3 Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Fri, 16 Nov 2018 16:19:23 -0800 Subject: [PATCH 02/10] igb: Change RXPBSIZE size when setting Qav mode Section 4.5.9 of the datasheet says that the total size of all packet buffers combined (TxPB 0 + 1 + 2 + 3 + RxPB + BMC2OS + OS2BMC) must not exceed 60KB. Today we are configuring a total of 62KB, so reduce the RxPB from 32KB to 30KB in order to respect that. The choice of changing RxPBSIZE here is mainly because it seems more correct to give more priority to the transmit packet buffers over the receiver ones when running in Qav mode. Also, the BMC2OS and OS2BMC sizes are already too short. Signed-off-by: Jesus Sanchez-Palencia Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_defines.h | 1 + drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 8a28f3388f699..01fcfc6f34151 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -334,6 +334,7 @@ #define I210_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ #define I210_RXPBSIZE_MASK 0x0000003F +#define I210_RXPBSIZE_PB_30KB 0x0000001E #define I210_RXPBSIZE_PB_32KB 0x00000020 #define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ #define I210_TXPBSIZE_MASK 0xC0FFFFFF diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bb4f3f64fbf01..e135adf46980b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1934,7 +1934,7 @@ static void igb_setup_tx_mode(struct igb_adapter *adapter) val = rd32(E1000_RXPBS); val &= ~I210_RXPBSIZE_MASK; - val |= I210_RXPBSIZE_PB_32KB; + val |= I210_RXPBSIZE_PB_30KB; wr32(E1000_RXPBS, val); /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ From bad87ee82f74707913a27f16d40cd8a1c340b47e Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 16 Nov 2018 16:19:24 -0800 Subject: [PATCH 03/10] Documentation: igb: Add a section about CBS Add some pointers to the definition of the CBS algorithm, and some notes about the limits of its implementation in the i210 family of controllers. Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- .../networking/device_drivers/intel/igb.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/networking/device_drivers/intel/igb.rst b/Documentation/networking/device_drivers/intel/igb.rst index ba16b86d55938..e87a4a72ea2da 100644 --- a/Documentation/networking/device_drivers/intel/igb.rst +++ b/Documentation/networking/device_drivers/intel/igb.rst @@ -177,6 +177,25 @@ rate limit using the IProute2 tool. Download the latest version of the IProute2 tool from Sourceforge if your version does not have all the features you require. +Credit Based Shaper (Qav Mode) +------------------------------ +When enabling the CBS qdisc in the hardware offload mode, traffic shaping using +the CBS (described in the IEEE 802.1Q-2018 Section 8.6.8.2 and discussed in the +Annex L) algorithm will run in the i210 controller, so it's more accurate and +uses less CPU. + +When using offloaded CBS, and the traffic rate obeys the configured rate +(doesn't go above it), CBS should have little to no effect in the latency. + +The offloaded version of the algorithm has some limits, caused by how the idle +slope is expressed in the adapter's registers. It can only represent idle slopes +in 16.38431 kbps units, which means that if a idle slope of 2576kbps is +requested, the controller will be configured to use a idle slope of ~2589 kbps, +because the driver rounds the value up. For more details, see the comments on +:c:func:`igb_config_tx_modes()`. + +NOTE: This feature is exclusive to i210 models. + Support ======= From cd0d465bb697a9c7bf66a9fe940f7981232f1676 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 19 Nov 2018 20:48:19 +0800 Subject: [PATCH 04/10] e100: Fix passing zero to 'PTR_ERR' warning in e100_load_ucode_wait Fix a static code checker warning: drivers/net/ethernet/intel/e100.c:1349 e100_load_ucode_wait() warn: passing zero to 'PTR_ERR' Signed-off-by: YueHaibing Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 5e5c57db0d3fb..0fd268070fb4a 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1345,8 +1345,8 @@ static inline int e100_load_ucode_wait(struct nic *nic) fw = e100_request_firmware(nic); /* If it's NULL, then no ucode is required */ - if (!fw || IS_ERR(fw)) - return PTR_ERR(fw); + if (IS_ERR_OR_NULL(fw)) + return PTR_ERR_OR_ZERO(fw); if ((err = e100_exec_cb(nic, (void *)fw, e100_setup_ucode))) netif_err(nic, probe, nic->netdev, From 31389b53b3e0b535867af9090a5d19ec64768d55 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 23 Nov 2018 19:10:28 +0300 Subject: [PATCH 05/10] i40e: define proper net_device::neigh_priv_len Out of bound read reported by KASan. i40iw_net_event() reads unconditionally 16 bytes from neigh->primary_key while the memory allocated for "neighbour" struct is evaluated in neigh_alloc() as tbl->entry_size + dev->neigh_priv_len where "dev" is a net_device. But the driver does not setup dev->neigh_priv_len and we read beyond the neigh entry allocated memory, so the patch in the next mail fixes this. Signed-off-by: Konstantin Khorenko Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fbb21ac06c980..5824d74bb8dce 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12339,6 +12339,9 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); + /* i40iw_net_event() reads 16 bytes from neigh->primary_key */ + netdev->neigh_priv_len = sizeof(u32) * 4; + netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; /* Setup netdev TC information */ From 9a2d57a7a0626783a62d205f255d8227b212c14b Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Wed, 28 Nov 2018 17:07:49 +0100 Subject: [PATCH 06/10] i40e: extend PTP gettime function to read system clock This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl. Cc: Richard Cochran Cc: Jacob Keller Signed-off-by: Miroslav Lichvar Tested-by: Andrew Bowers Acked-by: Jacob Keller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index e6fc0aff8c995..5fb4353c742b9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -28,19 +28,23 @@ * i40e_ptp_read - Read the PHC time from the device * @pf: Board private structure * @ts: timespec structure to hold the current time value + * @sts: structure to hold the system time before and after reading the PHC * * This function reads the PRTTSYN_TIME registers and stores them in a * timespec. However, since the registers are 64 bits of nanoseconds, we must * convert the result to a timespec before we can return. **/ -static void i40e_ptp_read(struct i40e_pf *pf, struct timespec64 *ts) +static void i40e_ptp_read(struct i40e_pf *pf, struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct i40e_hw *hw = &pf->hw; u32 hi, lo; u64 ns; /* The timer latches on the lowest register read. */ + ptp_read_system_prets(sts); lo = rd32(hw, I40E_PRTTSYN_TIME_L); + ptp_read_system_postts(sts); hi = rd32(hw, I40E_PRTTSYN_TIME_H); ns = (((u64)hi) << 32) | lo; @@ -146,7 +150,7 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) mutex_lock(&pf->tmreg_lock); - i40e_ptp_read(pf, &now); + i40e_ptp_read(pf, &now, NULL); timespec64_add_ns(&now, delta); i40e_ptp_write(pf, (const struct timespec64 *)&now); @@ -156,19 +160,21 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) } /** - * i40e_ptp_gettime - Get the time of the PHC + * i40e_ptp_gettimex - Get the time of the PHC * @ptp: The PTP clock structure * @ts: timespec structure to hold the current time value + * @sts: structure to hold the system time before and after reading the PHC * * Read the device clock and return the correct value on ns, after converting it * into a timespec struct. **/ -static int i40e_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); mutex_lock(&pf->tmreg_lock); - i40e_ptp_read(pf, ts); + i40e_ptp_read(pf, ts, sts); mutex_unlock(&pf->tmreg_lock); return 0; @@ -702,7 +708,7 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) pf->ptp_caps.pps = 0; pf->ptp_caps.adjfreq = i40e_ptp_adjfreq; pf->ptp_caps.adjtime = i40e_ptp_adjtime; - pf->ptp_caps.gettime64 = i40e_ptp_gettime; + pf->ptp_caps.gettimex64 = i40e_ptp_gettimex; pf->ptp_caps.settime64 = i40e_ptp_settime; pf->ptp_caps.enable = i40e_ptp_feature_enable; From eec903769b4ea476591ffff73bb7359f14f38c51 Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Thu, 29 Nov 2018 01:54:10 +0000 Subject: [PATCH 07/10] ice: Do not enable NAPI on q_vectors that have no rings If ice driver has q_vectors w/ active NAPI that has no rings, then this will result in a divide by zero error. To correct it I am updating the driver code so that we only support NAPI on q_vectors that have 1 or more rings allocated to them. See commit 13a8cd191a2b ("i40e: Do not enable NAPI on q_vectors that have no rings") for detail. Signed-off-by: Young Xiao Acked-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f9f0d470412bf..8725569d11f0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2564,8 +2564,12 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_enable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_enable(&q_vector->napi); + } } /** @@ -2932,8 +2936,12 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_disable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_disable(&q_vector->napi); + } } /** From 1fb3a7a75e2efcc83ef21f2434069cddd6fae6f5 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 3 Dec 2018 13:54:38 +0800 Subject: [PATCH 08/10] igb: Fix an issue that PME is not enabled during runtime suspend I210 ethernet card doesn't wakeup when a cable gets plugged. It's because its PME is not set. Since commit 42eca2302146 ("PCI: Don't touch card regs after runtime suspend D3"), if the PCI state is saved, pci_pm_runtime_suspend() stops calling pci_finish_runtime_suspend(), which enables the PCI PME. To fix the issue, let's not to save PCI states when it's runtime suspend, to let the PCI subsystem enables PME. Fixes: 42eca2302146 ("PCI: Don't touch card regs after runtime suspend D3") Signed-off-by: Kai-Heng Feng Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e135adf46980b..87bdf1604ae2c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8771,9 +8771,11 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, rtnl_unlock(); #ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; + if (!runtime) { + retval = pci_save_state(pdev); + if (retval) + return retval; + } #endif status = rd32(E1000_STATUS); From 8fa10ef01260937eb540b4e9bbc3efa023595993 Mon Sep 17 00:00:00 2001 From: Steve Douthit Date: Thu, 6 Dec 2018 15:50:39 +0000 Subject: [PATCH 09/10] ixgbe: register a mdiobus Most dsa devices expect a 'struct mii_bus' pointer to talk to switches via the MII interface. While this works for dsa devices, it will not work safely with Linux PHYs in all configurations since the firmware of the ixgbe device may be polling some PHY addresses in the background. Signed-off-by: Stephen Douthit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 + drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 299 ++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h | 2 + 5 files changed, 309 insertions(+) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index f05c91d4c4696..31fb76ee9d826 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -159,6 +159,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI select MDIO + select MDIO_DEVICE imply PTP_1588_CLOCK ---help--- This driver supports Intel(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 143bdd5ee2a08..08d85e336bd43 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -561,6 +562,7 @@ struct ixgbe_adapter { struct net_device *netdev; struct bpf_prog *xdp_prog; struct pci_dev *pdev; + struct mii_bus *mii_bus; unsigned long state; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2cd8c42d1403a..7a3798ff23660 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -39,6 +39,7 @@ #include "ixgbe.h" #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" +#include "ixgbe_phy.h" #include "ixgbe_sriov.h" #include "ixgbe_model.h" #include "ixgbe_txrx_common.h" @@ -11122,6 +11123,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL, true); + ixgbe_mii_bus_init(hw); + return 0; err_register: @@ -11172,6 +11175,8 @@ static void ixgbe_remove(struct pci_dev *pdev) set_bit(__IXGBE_REMOVING, &adapter->state); cancel_work_sync(&adapter->service_task); + if (adapter->mii_bus) + mdiobus_unregister(adapter->mii_bus); #ifdef CONFIG_IXGBE_DCA if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 919a7af84b423..cc4907f9ff02c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -3,6 +3,7 @@ #include #include +#include #include #include "ixgbe.h" @@ -658,6 +659,304 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, return status; } +#define IXGBE_HW_READ_REG(addr) IXGBE_READ_REG(hw, addr) + +/** + * ixgbe_msca_cmd - Write the command register and poll for completion/timeout + * @hw: pointer to hardware structure + * @cmd: command register value to write + **/ +static s32 ixgbe_msca_cmd(struct ixgbe_hw *hw, u32 cmd) +{ + IXGBE_WRITE_REG(hw, IXGBE_MSCA, cmd); + + return readx_poll_timeout(IXGBE_HW_READ_REG, IXGBE_MSCA, cmd, + !(cmd & IXGBE_MSCA_MDI_COMMAND), 10, + 10 * IXGBE_MDIO_COMMAND_TIMEOUT); +} + +/** + * ixgbe_mii_bus_read_generic - Read a clause 22/45 register with gssr flags + * @hw: pointer to hardware structure + * @addr: address + * @regnum: register number + * @gssr: semaphore flags to acquire + **/ +static s32 ixgbe_mii_bus_read_generic(struct ixgbe_hw *hw, int addr, + int regnum, u32 gssr) +{ + u32 hwaddr, cmd; + s32 data; + + if (hw->mac.ops.acquire_swfw_sync(hw, gssr)) + return -EBUSY; + + hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT; + if (regnum & MII_ADDR_C45) { + hwaddr |= regnum & GENMASK(21, 0); + cmd = hwaddr | IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND; + } else { + hwaddr |= (regnum & GENMASK(5, 0)) << IXGBE_MSCA_DEV_TYPE_SHIFT; + cmd = hwaddr | IXGBE_MSCA_OLD_PROTOCOL | + IXGBE_MSCA_READ_AUTOINC | IXGBE_MSCA_MDI_COMMAND; + } + + data = ixgbe_msca_cmd(hw, cmd); + if (data < 0) + goto mii_bus_read_done; + + /* For a clause 45 access the address cycle just completed, we still + * need to do the read command, otherwise just get the data + */ + if (!(regnum & MII_ADDR_C45)) + goto do_mii_bus_read; + + cmd = hwaddr | IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND; + data = ixgbe_msca_cmd(hw, cmd); + if (data < 0) + goto mii_bus_read_done; + +do_mii_bus_read: + data = IXGBE_READ_REG(hw, IXGBE_MSRWD); + data = (data >> IXGBE_MSRWD_READ_DATA_SHIFT) & GENMASK(16, 0); + +mii_bus_read_done: + hw->mac.ops.release_swfw_sync(hw, gssr); + return data; +} + +/** + * ixgbe_mii_bus_write_generic - Write a clause 22/45 register with gssr flags + * @hw: pointer to hardware structure + * @addr: address + * @regnum: register number + * @val: value to write + * @gssr: semaphore flags to acquire + **/ +static s32 ixgbe_mii_bus_write_generic(struct ixgbe_hw *hw, int addr, + int regnum, u16 val, u32 gssr) +{ + u32 hwaddr, cmd; + s32 err; + + if (hw->mac.ops.acquire_swfw_sync(hw, gssr)) + return -EBUSY; + + IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)val); + + hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT; + if (regnum & MII_ADDR_C45) { + hwaddr |= regnum & GENMASK(21, 0); + cmd = hwaddr | IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND; + } else { + hwaddr |= (regnum & GENMASK(5, 0)) << IXGBE_MSCA_DEV_TYPE_SHIFT; + cmd = hwaddr | IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE | + IXGBE_MSCA_MDI_COMMAND; + } + + /* For clause 45 this is an address cycle, for clause 22 this is the + * entire transaction + */ + err = ixgbe_msca_cmd(hw, cmd); + if (err < 0 || !(regnum & MII_ADDR_C45)) + goto mii_bus_write_done; + + cmd = hwaddr | IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND; + err = ixgbe_msca_cmd(hw, cmd); + +mii_bus_write_done: + hw->mac.ops.release_swfw_sync(hw, gssr); + return err; +} + +/** + * ixgbe_mii_bus_read - Read a clause 22/45 register + * @hw: pointer to hardware structure + * @addr: address + * @regnum: register number + **/ +static s32 ixgbe_mii_bus_read(struct mii_bus *bus, int addr, int regnum) +{ + struct ixgbe_adapter *adapter = bus->priv; + struct ixgbe_hw *hw = &adapter->hw; + u32 gssr = hw->phy.phy_semaphore_mask; + + return ixgbe_mii_bus_read_generic(hw, addr, regnum, gssr); +} + +/** + * ixgbe_mii_bus_write - Write a clause 22/45 register + * @hw: pointer to hardware structure + * @addr: address + * @regnum: register number + * @val: value to write + **/ +static s32 ixgbe_mii_bus_write(struct mii_bus *bus, int addr, int regnum, + u16 val) +{ + struct ixgbe_adapter *adapter = bus->priv; + struct ixgbe_hw *hw = &adapter->hw; + u32 gssr = hw->phy.phy_semaphore_mask; + + return ixgbe_mii_bus_write_generic(hw, addr, regnum, val, gssr); +} + +/** + * ixgbe_x550em_a_mii_bus_read - Read a clause 22/45 register on x550em_a + * @hw: pointer to hardware structure + * @addr: address + * @regnum: register number + **/ +static s32 ixgbe_x550em_a_mii_bus_read(struct mii_bus *bus, int addr, + int regnum) +{ + struct ixgbe_adapter *adapter = bus->priv; + struct ixgbe_hw *hw = &adapter->hw; + u32 gssr = hw->phy.phy_semaphore_mask; + + gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM; + return ixgbe_mii_bus_read_generic(hw, addr, regnum, gssr); +} + +/** + * ixgbe_x550em_a_mii_bus_write - Write a clause 22/45 register on x550em_a + * @hw: pointer to hardware structure + * @addr: address + * @regnum: register number + * @val: value to write + **/ +static s32 ixgbe_x550em_a_mii_bus_write(struct mii_bus *bus, int addr, + int regnum, u16 val) +{ + struct ixgbe_adapter *adapter = bus->priv; + struct ixgbe_hw *hw = &adapter->hw; + u32 gssr = hw->phy.phy_semaphore_mask; + + gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM; + return ixgbe_mii_bus_write_generic(hw, addr, regnum, val, gssr); +} + +/** + * ixgbe_get_first_secondary_devfn - get first device downstream of root port + * @devfn: PCI_DEVFN of root port on domain 0, bus 0 + * + * Returns pci_dev pointer to PCI_DEVFN(0, 0) on subordinate side of root + * on domain 0, bus 0, devfn = 'devfn' + **/ +static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) +{ + struct pci_dev *rp_pdev; + int bus; + + rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); + if (rp_pdev && rp_pdev->subordinate) { + bus = rp_pdev->subordinate->number; + return pci_get_domain_bus_and_slot(0, bus, 0); + } + + return NULL; +} + +/** + * ixgbe_x550em_a_has_mii - is this the first ixgbe x550em_a PCI function? + * @hw: pointer to hardware structure + * + * Returns true if hw points to lowest numbered PCI B:D.F x550_em_a device in + * the SoC. There are up to 4 MACs sharing a single MDIO bus on the x550em_a, + * but we only want to register one MDIO bus. + **/ +static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) +{ + struct ixgbe_adapter *adapter = hw->back; + struct pci_dev *pdev = adapter->pdev; + struct pci_dev *func0_pdev; + + /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices + * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 + * It's not valid for function 0 to be disabled and function 1 is up, + * so the lowest numbered ixgbe dev will be device 0 function 0 on one + * of those two root ports + */ + func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); + if (func0_pdev) { + if (func0_pdev == pdev) + return true; + else + return false; + } + func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); + if (func0_pdev == pdev) + return true; + + return false; +} + +/** + * ixgbe_mii_bus_init - mii_bus structure setup + * @hw: pointer to hardware structure + * + * Returns 0 on success, negative on failure + * + * ixgbe_mii_bus_init initializes a mii_bus structure in adapter + **/ +s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) +{ + struct ixgbe_adapter *adapter = hw->back; + struct pci_dev *pdev = adapter->pdev; + struct device *dev = &adapter->netdev->dev; + struct mii_bus *bus; + + adapter->mii_bus = devm_mdiobus_alloc(dev); + if (!adapter->mii_bus) + return -ENOMEM; + + bus = adapter->mii_bus; + + switch (hw->device_id) { + /* C3000 SoCs */ + case IXGBE_DEV_ID_X550EM_A_KR: + case IXGBE_DEV_ID_X550EM_A_KR_L: + case IXGBE_DEV_ID_X550EM_A_SFP_N: + case IXGBE_DEV_ID_X550EM_A_SGMII: + case IXGBE_DEV_ID_X550EM_A_SGMII_L: + case IXGBE_DEV_ID_X550EM_A_10G_T: + case IXGBE_DEV_ID_X550EM_A_SFP: + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + if (!ixgbe_x550em_a_has_mii(hw)) + goto ixgbe_no_mii_bus; + bus->read = &ixgbe_x550em_a_mii_bus_read; + bus->write = &ixgbe_x550em_a_mii_bus_write; + break; + default: + bus->read = &ixgbe_mii_bus_read; + bus->write = &ixgbe_mii_bus_write; + break; + } + + /* Use the position of the device in the PCI hierarchy as the id */ + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mdio-%s", ixgbe_driver_name, + pci_name(pdev)); + + bus->name = "ixgbe-mdio"; + bus->priv = adapter; + bus->parent = dev; + bus->phy_mask = GENMASK(31, 0); + + /* Support clause 22/45 natively. ixgbe_probe() sets MDIO_EMULATE_C22 + * unfortunately that causes some clause 22 frames to be sent with + * clause 45 addressing. We don't want that. + */ + hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22; + + return mdiobus_register(bus); + +ixgbe_no_mii_bus: + devm_mdiobus_free(dev, bus); + adapter->mii_bus = NULL; + return -ENODEV; +} + /** * ixgbe_setup_phy_link_generic - Set and restart autoneg * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 64e44e01c973f..214b01085718f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -120,6 +120,8 @@ /* SFP+ SFF-8472 Compliance code */ #define IXGBE_SFF_SFF_8472_UNSUP 0x00 +s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw); + s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw); s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw); s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, From 643bae17fd4ccb503cdc6d99c1b4fbd2c4ca6a78 Mon Sep 17 00:00:00 2001 From: Steve Douthit Date: Thu, 6 Dec 2018 15:50:43 +0000 Subject: [PATCH 10/10] ixgbe: use mii_bus to handle MII related ioctls Use the mii_bus callbacks to address the entire clause 22/45 address space. Enables userspace to poke switch registers instead of a single PHY address. The ixgbe firmware may be polling PHYs in a way that is not protected by the mii_bus lock. This isn't new behavior, but as Andrew Lunn pointed out there are more addresses available for conflicts. Signed-off-by: Stephen Douthit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7a3798ff23660..daff8183534b9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8791,6 +8791,15 @@ ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr) u16 value; int rc; + if (adapter->mii_bus) { + int regnum = addr; + + if (devad != MDIO_DEVAD_NONE) + regnum |= (devad << 16) | MII_ADDR_C45; + + return mdiobus_read(adapter->mii_bus, prtad, regnum); + } + if (prtad != hw->phy.mdio.prtad) return -EINVAL; rc = hw->phy.ops.read_reg(hw, addr, devad, &value); @@ -8805,6 +8814,15 @@ static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad, struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + if (adapter->mii_bus) { + int regnum = addr; + + if (devad != MDIO_DEVAD_NONE) + regnum |= (devad << 16) | MII_ADDR_C45; + + return mdiobus_write(adapter->mii_bus, prtad, regnum, value); + } + if (prtad != hw->phy.mdio.prtad) return -EINVAL; return hw->phy.ops.write_reg(hw, addr, devad, value);