From f164b84529e3bf9ae43882fd3ac84bef94d104cf Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 16 Oct 2015 13:27:49 -0700 Subject: [PATCH 01/15] ixgbe: Prevent KR PHY reset in ixgbe_init_phy_ops_x550em This patch removes KR PHY reset from ixgbe_init_phy_ops_x550em, since this function is meant to initialize function pointers for the detected PHY type. Internal PHY reset was moved to ixgbe_setup_internal_phy_t_x550em which will now detect which mode the internal PHY operates in and set it up as required. Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index bf2ae8daf7175..a9c73b75a9020 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1727,6 +1727,12 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; + if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { + speed = IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + return ixgbe_setup_kr_speed_x550em(hw, speed); + } + /* If link is not up, then there is no setup necessary so return */ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); if (status) @@ -1931,7 +1937,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) { struct ixgbe_phy_info *phy = &hw->phy; - ixgbe_link_speed speed; s32 ret_val; hw->mac.ops.set_lan_id(hw); @@ -1944,10 +1949,6 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) * to determine internal PHY mode. */ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL); - if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - } } /* Identify the PHY or SFP module */ @@ -1979,14 +1980,8 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. */ - if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - phy->ops.setup_internal_link = - ixgbe_setup_internal_phy_t_x550em; - } else { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } + phy->ops.setup_internal_link = + ixgbe_setup_internal_phy_t_x550em; /* setup SW LPLU only for first revision */ if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, From 83a9fb20ecc4bb8b36a610ab833962fed52db64c Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 19 Oct 2015 09:22:14 -0700 Subject: [PATCH 02/15] ixgbe: Add support for newer thermal alarm The newer copper PHY implementation used with newer X550EM_x devices uses a different thermal alarm type than the earlier one. Make changes to support both types. Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 4 ++++ drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 22 ++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 995f03107eacd..927349da232ac 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1345,7 +1345,10 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK 0xFF01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG 0xFC01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */ +#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */ #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* global fault msg */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */ #define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */ /* autoneg vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 @@ -1353,6 +1356,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */ #define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */ +#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN 0x0010 /*int dev fault enable */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index a9c73b75a9020..b8ad3f212e726 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1444,7 +1444,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) IXGBE_MDIO_GLOBAL_ALARM_1_INT))) return status; - /* High temperature failure alarm triggered */ + /* Global alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); @@ -1458,6 +1458,21 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) ixgbe_set_copper_phy_power(hw, false); return IXGBE_ERR_OVERTEMP; } + if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { + /* device fault alarm triggered */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®); + if (status) + return status; + + /* if device fault was due to high temp alarm handle and exit */ + if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { + /* power down the PHY in case the PHY FW didn't */ + ixgbe_set_copper_phy_power(hw, false); + return IXGBE_ERR_OVERTEMP; + } + } /* Vendor alarm 2 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, @@ -1511,14 +1526,15 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) if (status) return status; - /* Enables high temperature failure alarm */ + /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); if (status) return status; - reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN; + reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN | + IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, From 780484d853d096b4253b966e1789c4f338dd7301 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Wed, 21 Oct 2015 17:21:10 -0700 Subject: [PATCH 03/15] ixgbe: Use private workqueue to avoid certain possible hangs Use a private workqueue to avoid hangs that were otherwise possible when performing stress tests, such as creating and destroying many VFS repeatedly. Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c95042ee30dec..0918e32012c21 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -172,6 +172,8 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *ixgbe_wq; + static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, @@ -313,7 +315,7 @@ static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter) if (!test_bit(__IXGBE_DOWN, &adapter->state) && !test_bit(__IXGBE_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbe_wq, &adapter->service_task); } static void ixgbe_remove_adapter(struct ixgbe_hw *hw) @@ -9320,6 +9322,12 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); + ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name); + if (!ixgbe_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name); + return -ENOMEM; + } + ixgbe_dbg_init(); ret = pci_register_driver(&ixgbe_driver); @@ -9351,6 +9359,10 @@ static void __exit ixgbe_exit_module(void) pci_unregister_driver(&ixgbe_driver); ixgbe_dbg_exit(); + if (ixgbe_wq) { + destroy_workqueue(ixgbe_wq); + ixgbe_wq = NULL; + } } #ifdef CONFIG_IXGBE_DCA From 40a13e2493c9882cb4d09054d81a5063cd1589a2 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Wed, 21 Oct 2015 17:21:15 -0700 Subject: [PATCH 04/15] ixgbevf: Use a private workqueue to avoid certain possible hangs Use a private workqueue to avoid hangs that were otherwise possible when performing stress tests, such as creating and destroying many VFS repeatedly. Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index dbbd1be474626..18f1c2232cad0 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -59,7 +59,7 @@ static const char ixgbevf_driver_string[] = #define DRV_VERSION "2.12.1-k" const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = - "Copyright (c) 2009 - 2012 Intel Corporation."; + "Copyright (c) 2009 - 2015 Intel Corporation."; static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_82599_vf] = &ixgbevf_82599_vf_info, @@ -96,12 +96,14 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static struct workqueue_struct *ixgbevf_wq; + static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter) { if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && !test_bit(__IXGBEVF_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbevf_wq, &adapter->service_task); } static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) @@ -4250,6 +4252,11 @@ static int __init ixgbevf_init_module(void) ixgbevf_driver_version); pr_info("%s\n", ixgbevf_copyright); + ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); + if (!ixgbevf_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name); + return -ENOMEM; + } ret = pci_register_driver(&ixgbevf_driver); return ret; @@ -4266,6 +4273,10 @@ module_init(ixgbevf_init_module); static void __exit ixgbevf_exit_module(void) { pci_unregister_driver(&ixgbevf_driver); + if (ixgbevf_wq) { + destroy_workqueue(ixgbevf_wq); + ixgbevf_wq = NULL; + } } #ifdef DEBUG From 50985b5f62cc74e9e222f0ddf890e1ba87be371a Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Wed, 21 Oct 2015 17:21:20 -0700 Subject: [PATCH 05/15] ixgbevf: Minor cleanups Make some minor cleanups, such as simplifying return paths, deleting unneeded initializations, return values more directly and so forth. Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 36 +++++++------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 18f1c2232cad0..42b971c0cc87e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1334,7 +1334,6 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) int txr_remaining = adapter->num_tx_queues; int i, j; int rqpv, tqpv; - int err = 0; q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; @@ -1347,7 +1346,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) for (; txr_idx < txr_remaining; v_start++, txr_idx++) map_vector_to_txq(adapter, v_start, txr_idx); - goto out; + return 0; } /* If we don't have enough vectors for a 1-to-1 @@ -1372,8 +1371,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) } } -out: - return err; + return 0; } /** @@ -1471,9 +1469,7 @@ static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter) { - int err = 0; - - err = ixgbevf_request_msix_irqs(adapter); + int err = ixgbevf_request_msix_irqs(adapter); if (err) hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err); @@ -1832,7 +1828,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - int err = -EOPNOTSUPP; + int err; spin_lock_bh(&adapter->mbx_lock); @@ -2048,7 +2044,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) ixgbe_mbox_api_11, ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; - int err = 0, idx = 0; + int err, idx = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2421,7 +2417,7 @@ static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter) static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int err = 0; + int err; int vector, v_budget; /* It's easy to be greedy for MSI-X vectors, but it really @@ -2439,26 +2435,21 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) */ adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) { - err = -ENOMEM; - goto out; - } + if (!adapter->msix_entries) + return -ENOMEM; for (vector = 0; vector < v_budget; vector++) adapter->msix_entries[vector].entry = vector; err = ixgbevf_acquire_msix_vectors(adapter, v_budget); if (err) - goto out; + return err; err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); if (err) - goto out; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + return err; -out: - return err; + return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); } /** @@ -4246,8 +4237,6 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { - int ret; - pr_info("%s - version %s\n", ixgbevf_driver_string, ixgbevf_driver_version); @@ -4258,8 +4247,7 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - ret = pci_register_driver(&ixgbevf_driver); - return ret; + return pci_register_driver(&ixgbevf_driver); } module_init(ixgbevf_init_module); From c9f53e63c2089d8154900ed06da0aa7be9f74201 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Oct 2015 16:26:30 -0700 Subject: [PATCH 06/15] ixgbe: Refactor MAC address configuration code In the process of tracking down a memory leak when adding/removing FDB entries I had to go through the MAC address configuration code for ixgbe. In the process of doing so I found a number of issues that impacted readability and performance. This change updates the code in general to clean it up so it becomes clear what each step is doing. From what I can tell there a couple of bugs cleaned up in this code. First is the fact that the MAC addresses were being double counted for the PF. As a result once entries up to 63 had been used you could no longer add additional filters. A simple test case for this: for i in `seq 0 96` do ip link add link ens8 name mv$i type macvlan ip link set dev mv$i up done Test script: ethregs -s 0:8.0 | grep -e "RAH" | grep 8000....$ When things are working correctly RAL/H registers 1 - 97 will be consumed. In the failing case it will stop at 63 and prevent any further filters from being added. Signed-off-by: Alexander Duyck Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 7 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 163 +++++++++++------- 2 files changed, 100 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1d2174526a4c9..9214c9d717182 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -587,9 +587,10 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) struct ixgbe_mac_addr { u8 addr[ETH_ALEN]; - u16 queue; + u16 pool; u16 state; /* bitmask */ }; + #define IXGBE_MAC_STATE_DEFAULT 0x1 #define IXGBE_MAC_STATE_MODIFIED 0x2 #define IXGBE_MAC_STATE_IN_USE 0x4 @@ -883,9 +884,9 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); #endif int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *, struct ixgbe_ring *); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0918e32012c21..29f1a36224150 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4031,124 +4031,156 @@ static int ixgbe_write_mc_addr_list(struct net_device *netdev) #ifdef CONFIG_PCI_IOV void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, - adapter->mac_table[i].queue, + + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, IXGBE_RAH_AV); else hw->mac.ops.clear_rar(hw, i); - - adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED); } } -#endif +#endif static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { - if (adapter->mac_table[i].state & - IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, - adapter->mac_table[i].addr, - adapter->mac_table[i].queue, - IXGBE_RAH_AV); - else - hw->mac.ops.clear_rar(hw, i); - adapter->mac_table[i].state &= - ~(IXGBE_MAC_STATE_MODIFIED); - } + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED)) + continue; + + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); } } static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) { - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; } + ixgbe_sync_mac_table(adapter); } -static int ixgbe_available_rars(struct ixgbe_adapter *adapter) +static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i, count = 0; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state == 0) - count++; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* do not count default RAR as available */ + if (mac_table->state & IXGBE_MAC_STATE_DEFAULT) + continue; + + /* only count unused and addresses that belong to us */ + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) { + if (mac_table->pool != pool) + continue; + } + + count++; } + return count; } /* this function destroys the first RAR entry */ -static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter, - u8 *addr) +static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; - memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); - adapter->mac_table[0].queue = VMDQ_P(0); - adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | - IXGBE_MAC_STATE_IN_USE); - hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].queue, + memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN); + mac_table->pool = VMDQ_P(0); + + mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE; + + hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool, IXGBE_RAH_AV); } -int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) continue; - adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | - IXGBE_MAC_STATE_IN_USE); - ether_addr_copy(adapter->mac_table[i].addr, addr); - adapter->mac_table[i].queue = queue; + + ether_addr_copy(mac_table->addr, addr); + mac_table->pool = pool; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE; + ixgbe_sync_mac_table(adapter); + return i; } + return -ENOMEM; } -int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { - /* search table for addr, if found, set to 0 and sync */ - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr) && - adapter->mac_table[i].queue == queue) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; - ixgbe_sync_mac_table(adapter); - return 0; - } + /* search table for addr, if found clear IN_USE flag and sync */ + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* we can only delete an entry if it is in use */ + if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE)) + continue; + /* we only care about entries that belong to the given pool */ + if (mac_table->pool != pool) + continue; + /* we only care about a specific MAC address */ + if (!ether_addr_equal(addr, mac_table->addr)) + continue; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; + + ixgbe_sync_mac_table(adapter); + + return 0; } + return -ENOMEM; } /** @@ -4166,7 +4198,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) int count = 0; /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn)) return -ENOMEM; if (!netdev_uc_empty(netdev)) { @@ -5039,7 +5071,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int err; - u8 old_addr[ETH_ALEN]; if (ixgbe_removed(hw->hw_addr)) return; @@ -5076,9 +5107,8 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); /* do not flush user set addresses */ - memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); ixgbe_flush_sw_mac_table(adapter); - ixgbe_mac_set_default_filter(adapter, old_addr); + ixgbe_mac_set_default_filter(adapter); /* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index) @@ -7656,17 +7686,16 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; - int ret; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); - return ret > 0 ? 0 : ret; + ixgbe_mac_set_default_filter(adapter); + + return 0; } static int @@ -8867,7 +8896,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_sw_init; } - ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr); + ixgbe_mac_set_default_filter(adapter); setup_timer(&adapter->service_timer, &ixgbe_service_timer, (unsigned long) adapter); From 0f079d22834ac0529413bdee5b5aa52485942162 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Oct 2015 16:26:36 -0700 Subject: [PATCH 07/15] ixgbe: Use __dev_uc_sync and __dev_uc_unsync for unicast addresses This change replaces the ixgbe_write_uc_addr_list call in ixgbe_set_rx_mode with a call to __dev_uc_sync instead. This works much better with the MAC addr list code that was already in place and solves an issue in which you couldn't remove an FDB address without having to reset the port. Signed-off-by: Alexander Duyck Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 29f1a36224150..708e2109cdfa8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4212,6 +4212,25 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) return count; } +static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0)); + + return min_t(int, ret, 0); +} + +static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0)); + + return 0; +} + /** * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4267,8 +4286,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * sufficient space to store all the addresses then enable * unicast promiscuous mode */ - count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0)); - if (count < 0) { + if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) { fctrl |= IXGBE_FCTRL_UPE; vmolr |= IXGBE_VMOLR_ROPE; } @@ -5106,8 +5124,12 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - /* do not flush user set addresses */ + + /* flush entries out of MAC table */ ixgbe_flush_sw_mac_table(adapter); + __dev_uc_unsync(netdev, NULL); + + /* do not flush user set addresses */ ixgbe_mac_set_default_filter(adapter); /* update SAN MAC vmdq pool selection */ From 2f9be1665585a3757a00a6d1b8201d0ede937a34 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Oct 2015 16:26:42 -0700 Subject: [PATCH 08/15] ixgbe: Allow FDB entries access to more RAR filters This change makes it so that we allow the PF to make use of all free RAR entries for FDB use if needed. Previously the code limited us to 16 unicast entries, however this was shared between MACVLAN which wasn't limited and the FDB code which was. So instead of treating the FDB code as a second class citizen I have updated it so that it has access to just as many entries as the MACVLAN filters. Signed-off-by: Alexander Duyck Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 708e2109cdfa8..378c44d0f0170 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8200,7 +8200,10 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { - if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev)) + struct ixgbe_adapter *adapter = netdev_priv(dev); + u16 pool = VMDQ_P(0); + + if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool)) return -ENOMEM; } From a9763f3cb54c7f1c6a47962c814935654476d09f Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Tue, 27 Oct 2015 09:58:07 -0700 Subject: [PATCH 09/15] ixgbe: Update PTP to support X550EM_x devices The X550EM_x devices handle clocking differently, so update the PTP implementation to accommodate them. This involves significant changes to ixgbe's PTP code to accommodate the new range of behaviors including things like non-power-of-2 clock wrapping. Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 38 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 720 +++++++++++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 8 + 4 files changed, 578 insertions(+), 197 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 9214c9d717182..518f339476f8e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -224,6 +224,8 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; +#define IXGBE_TS_HDR_LEN 8 + enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_XPS_INIT_DONE, @@ -282,6 +284,8 @@ struct ixgbe_ring { u16 next_to_use; u16 next_to_clean; + unsigned long last_rx_timestamp; + union { u16 next_to_alloc; struct { @@ -640,6 +644,8 @@ struct ixgbe_adapter { #define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 22) #define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 23) #define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24) +#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) +#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE (u32)(1 << 0) @@ -756,9 +762,12 @@ struct ixgbe_adapter { unsigned long last_rx_ptp_check; unsigned long last_rx_timestamp; spinlock_t tmreg_lock; - struct cyclecounter cc; - struct timecounter tc; + struct cyclecounter hw_cc; + struct timecounter hw_tc; u32 base_incval; + u32 tx_hwtstamp_timeouts; + u32 rx_hwtstamp_cleared; + void (*ptp_setup_sdp)(struct ixgbe_adapter *); /* SR-IOV */ DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); @@ -969,12 +978,33 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter); void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter); -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb); +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *); +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb); +static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) { + ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb); + return; + } + + if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) + return; + + ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr); +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 378c44d0f0170..61926283259a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1634,6 +1634,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { struct net_device *dev = rx_ring->netdev; + u32 flags = rx_ring->q_vector->adapter->flags; ixgbe_update_rsc_stats(rx_ring, skb); @@ -1641,8 +1642,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); - if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) - ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb); + if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED)) + ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { @@ -2740,7 +2741,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) @@ -2947,7 +2948,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* would disable interrupts here but EIAM disabled it */ napi_schedule_irqoff(&q_vector->napi); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5ba04025e2b9..ef1504d41890f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -27,6 +27,7 @@ *******************************************************************************/ #include "ixgbe.h" #include +#include /* * The 82599 and the X540 do not have true 64bit nanosecond scale @@ -93,7 +94,6 @@ #define IXGBE_INCVAL_SHIFT_82599 7 #define IXGBE_INCPER_SHIFT_82599 24 -#define IXGBE_MAX_TIMEADJ_VALUE 0x7FFFFFFFFFFFFFFFULL #define IXGBE_OVERFLOW_PERIOD (HZ * 30) #define IXGBE_PTP_TX_TIMEOUT (HZ * 15) @@ -104,8 +104,68 @@ */ #define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL +/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL + * which contain measurements of seconds and nanoseconds respectively. This + * matches the standard linux representation of time in the kernel. In addition, + * the X550 also has a SYSTIMER register which represents residue, or + * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA + * register is used, but it is unlike the X540 and 82599 devices. TIMINCA + * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the + * high bit representing whether the adjustent is positive or negative. Every + * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range + * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the + * X550's clock for purposes of SYSTIME generation is constant and not dependent + * on the link speed. + * + * SYSTIMEH SYSTIMEL SYSTIMER + * +--------------+ +--------------+ +-------------+ + * X550 | 32 | | 32 | | 32 | + * *--------------+ +--------------+ +-------------+ + * \____seconds___/ \_nanoseconds_/ \__2^-32 ns__/ + * + * This results in a full 96 bits to represent the clock, with 32 bits for + * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under + * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for + * underflow of adjustments. + * + * The 32 bits of seconds for the X550 overflows every + * 2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years. + * + * In order to adjust the clock frequency for the X550, the TIMINCA register is + * provided. This register represents a + or minus nearly 0.5 ns adjustment to + * the base frequency. It is measured in 2^-32 ns units, with the high bit being + * the sign bit. This register enables software to calculate frequency + * adjustments and apply them directly to the clock rate. + * + * The math for converting ppb into TIMINCA values is fairly straightforward. + * TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL + * + * This assumes that ppb is never high enough to create a value bigger than + * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this + * value is also simple. + * Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL + * + * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is + * 12.5 nanoseconds. This means that the Max ppb is 39999999 + * Note: We subtract one in order to ensure no overflow, because the TIMINCA + * register can only hold slightly under 0.5 nanoseconds. + * + * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns + * into 2^-32 units, which is + * + * 12.5 * 2^32 = C80000000 + * + * Some revisions of hardware have a faster base frequency than the registers + * were defined for. To fix this, we use a timecounter structure with the + * proper mult and shift to convert the cycles into nanoseconds of time. + */ +#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL +#define INCVALUE_MASK 0x7FFFFFFF +#define ISGN 0x80000000 +#define MAX_TIMADJ 0x7FFFFFFF + /** - * ixgbe_ptp_setup_sdp + * ixgbe_ptp_setup_sdp_x540 * @hw: the hardware private structure * * this function enables or disables the clock out feature on SDP0 for @@ -116,83 +176,116 @@ * aligns the start of the PPS signal to that value. The shift is * necessary because it can change based on the link speed. */ -static void ixgbe_ptp_setup_sdp(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int shift = adapter->cc.shift; + int shift = adapter->hw_cc.shift; u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem; u64 ns = 0, clock_edge = 0; - if ((adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED) && - (hw->mac.type == ixgbe_mac_X540)) { + /* disable the pin first */ + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); + IXGBE_WRITE_FLUSH(hw); - /* disable the pin first */ - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - IXGBE_WRITE_FLUSH(hw); + if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED)) + return; - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - /* - * enable the SDP0 pin as output, and connected to the - * native function for Timesync (ClockOut) - */ - esdp |= (IXGBE_ESDP_SDP0_DIR | - IXGBE_ESDP_SDP0_NATIVE); + /* enable the SDP0 pin as output, and connected to the + * native function for Timesync (ClockOut) + */ + esdp |= IXGBE_ESDP_SDP0_DIR | + IXGBE_ESDP_SDP0_NATIVE; - /* - * enable the Clock Out feature on SDP0, and allow - * interrupts to occur when the pin changes - */ - tsauxc = (IXGBE_TSAUXC_EN_CLK | - IXGBE_TSAUXC_SYNCLK | - IXGBE_TSAUXC_SDP0_INT); + /* enable the Clock Out feature on SDP0, and allow + * interrupts to occur when the pin changes + */ + tsauxc = IXGBE_TSAUXC_EN_CLK | + IXGBE_TSAUXC_SYNCLK | + IXGBE_TSAUXC_SDP0_INT; - /* clock period (or pulse length) */ - clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); - clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); + /* clock period (or pulse length) */ + clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); + clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); - /* - * Account for the cyclecounter wrap-around value by - * using the converted ns value of the current time to - * check for when the next aligned second would occur. - */ - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; - ns = timecounter_cyc2time(&adapter->tc, clock_edge); + /* Account for the cyclecounter wrap-around value by + * using the converted ns value of the current time to + * check for when the next aligned second would occur. + */ + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; + ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge); - div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); - clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); + div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); + clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); - /* specify the initial clock start time */ - trgttiml = (u32)clock_edge; - trgttimh = (u32)(clock_edge >> 32); + /* specify the initial clock start time */ + trgttiml = (u32)clock_edge; + trgttimh = (u32)(clock_edge >> 32); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); - } else { - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - } + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); IXGBE_WRITE_FLUSH(hw); } /** - * ixgbe_ptp_read - read raw cycle counter (to be used by time counter) + * ixgbe_ptp_read_X550 - read cycle counter value + * @hw_cc: cyclecounter structure + * + * This function reads SYSTIME registers. It is called by the cyclecounter + * structure to convert from internal representation into nanoseconds. We need + * this for X550 since some skews do not have expected clock frequency and + * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of + * "cycles", rather than seconds and nanoseconds. + */ +static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc) +{ + struct ixgbe_adapter *adapter = + container_of(hw_cc, struct ixgbe_adapter, hw_cc); + struct ixgbe_hw *hw = &adapter->hw; + struct timespec64 ts; + + /* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'. + * Some revisions of hardware run at a higher frequency and so the + * cycles are not guaranteed to be nanoseconds. The timespec64 created + * here is used for its math/conversions but does not necessarily + * represent nominal time. + * + * It should be noted that this cyclecounter will overflow at a + * non-bitmask field since we have to convert our billions of cycles + * into an actual cycles count. This results in some possible weird + * situations at high cycle counter stamps. However given that 32 bits + * of "seconds" is ~138 years this isn't a problem. Even at the + * increased frequency of some revisions, this is still ~103 years. + * Since the SYSTIME values start at 0 and we never write them, it is + * highly unlikely for the cyclecounter to overflow in practice. + */ + IXGBE_READ_REG(hw, IXGBE_SYSTIMR); + ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML); + ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH); + + return (u64)timespec64_to_ns(&ts); +} + +/** + * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter) * @cc: the cyclecounter structure * * this function reads the cyclecounter registers and is called by the * cyclecounter structure used to construct a ns counter from the * arbitrary fixed point registers */ -static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) +static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc) { struct ixgbe_adapter *adapter = - container_of(cc, struct ixgbe_adapter, cc); + container_of(cc, struct ixgbe_adapter, hw_cc); struct ixgbe_hw *hw = &adapter->hw; u64 stamp = 0; @@ -203,20 +296,79 @@ static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) } /** - * ixgbe_ptp_adjfreq + * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp + * @adapter: private adapter structure + * @hwtstamp: stack timestamp structure + * @systim: unsigned 64bit system time value + * + * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value + * which can be used by the stack's ptp functions. + * + * The lock is used to protect consistency of the cyclecounter and the SYSTIME + * registers. However, it does not need to protect against the Rx or Tx + * timestamp registers, as there can't be a new timestamp until the old one is + * unlatched by reading. + * + * In addition to the timestamp in hardware, some controllers need a software + * overflow cyclecounter, and this function takes this into account as well. + **/ +static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamp, + u64 timestamp) +{ + unsigned long flags; + struct timespec64 systime; + u64 ns; + + memset(hwtstamp, 0, sizeof(*hwtstamp)); + + switch (adapter->hw.mac.type) { + /* X550 and later hardware supposedly represent time using a seconds + * and nanoseconds counter, instead of raw 64bits nanoseconds. We need + * to convert the timestamp into cycles before it can be fed to the + * cyclecounter. We need an actual cyclecounter because some revisions + * of hardware run at a higher frequency and thus the counter does + * not represent seconds/nanoseconds. Instead it can be thought of as + * cycles and billions of cycles. + */ + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* Upper 32 bits represent billions of cycles, lower 32 bits + * represent cycles. However, we use timespec64_to_ns for the + * correct math even though the units haven't been corrected + * yet. + */ + systime.tv_sec = timestamp >> 32; + systime.tv_nsec = timestamp & 0xFFFFFFFF; + + timestamp = timespec64_to_ns(&systime); + break; + default: + break; + } + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->hw_tc, timestamp); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + hwtstamp->hwtstamp = ns_to_ktime(ns); +} + +/** + * ixgbe_ptp_adjfreq_82599 * @ptp: the ptp clock structure * @ppb: parts per billion adjustment from base * * adjust the frequency of the ptp cycle counter by the * indicated ppb from the base frequency. */ -static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; - u64 freq; - u32 diff, incval; + u64 freq, incval; + u32 diff; int neg_adj = 0; if (ppb < 0) { @@ -235,12 +387,16 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) switch (hw->mac.type) { case ixgbe_mac_X540: - IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); + if (incval > 0xFFFFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval); break; case ixgbe_mac_82599EB: + if (incval > 0x00FFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + ((u32)incval & 0x00FFFFFFUL)); break; default: break; @@ -249,6 +405,43 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } +/** + * ixgbe_ptp_adjfreq_X550 + * @ptp: the ptp clock structure + * @ppb: parts per billion adjustment from base + * + * adjust the frequency of the SYSTIME registers by the indicated ppb from base + * frequency + */ +static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb) +{ + struct ixgbe_adapter *adapter = + container_of(ptp, struct ixgbe_adapter, ptp_caps); + struct ixgbe_hw *hw = &adapter->hw; + int neg_adj = 0; + u64 rate = IXGBE_X550_BASE_PERIOD; + u32 inca; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate *= ppb; + rate = div_u64(rate, 1000000000ULL); + + /* warn if rate is too large */ + if (rate >= INCVALUE_MASK) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca); + + return 0; +} + /** * ixgbe_ptp_adjtime * @ptp: the ptp clock structure @@ -263,10 +456,11 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) unsigned long flags; spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_adjtime(&adapter->tc, delta); + timecounter_adjtime(&adapter->hw_tc, delta); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -283,11 +477,11 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; + u64 ns; spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_read(&adapter->tc); + ns = timecounter_read(&adapter->hw_tc); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); *ts = ns_to_timespec64(ns); @@ -308,17 +502,16 @@ static int ixgbe_ptp_settime(struct ptp_clock_info *ptp, { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; - - ns = timespec64_to_ns(ts); + u64 ns = timespec64_to_ns(ts); /* reset the timecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_init(&adapter->tc, &adapter->cc, ns); + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -343,33 +536,26 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * event when the clock SDP triggers. Clear mask when PPS is * disabled */ - if (rq->type == PTP_CLK_REQ_PPS) { - switch (adapter->hw.mac.type) { - case ixgbe_mac_X540: - if (on) - adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; - else - adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - - ixgbe_ptp_setup_sdp(adapter); - return 0; - default: - break; - } - } + if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) + return -ENOTSUPP; + + if (on) + adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; + else + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - return -ENOTSUPP; + adapter->ptp_setup_sdp(adapter); + return 0; } /** * ixgbe_ptp_check_pps_event * @adapter: the private adapter structure - * @eicr: the interrupt cause register value * * This function is called by the interrupt routine when checking for * interrupts. It will check and handle a pps event. */ -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ptp_clock_event event; @@ -425,7 +611,9 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); + struct ixgbe_ring *rx_ring; unsigned long rx_event; + int n; /* if we don't have a valid timestamp in the registers, just update the * timeout counter and exit @@ -437,18 +625,42 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) /* determine the most recent watchdog or rx_timestamp event */ rx_event = adapter->last_rx_ptp_check; - if (time_after(adapter->last_rx_timestamp, rx_event)) - rx_event = adapter->last_rx_timestamp; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } /* only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5*HZ)) { + if (time_is_before_jiffies(rx_event + 5 * HZ)) { IXGBE_READ_REG(hw, IXGBE_RXSTMPH); adapter->last_rx_ptp_check = jiffies; + adapter->rx_hwtstamp_cleared++; e_warn(drv, "clearing RX Timestamp hang\n"); } } +/** + * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state + * @adapter: the private adapter structure + * + * This function should be called whenever the state related to a Tx timestamp + * needs to be cleared. This helps ensure that all related bits are reset for + * the next Tx timestamp event. + */ +static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_READ_REG(hw, IXGBE_TXSTMPH); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); +} + /** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp * @adapter: the private adapter struct @@ -461,23 +673,15 @@ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; - u64 regval = 0, ns; - unsigned long flags; + u64 regval = 0; regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - shhwtstamps.hwtstamp = ns_to_ktime(ns); + ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); + ixgbe_ptp_clear_tx_timestamp(adapter); } /** @@ -497,38 +701,85 @@ static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work) IXGBE_PTP_TX_TIMEOUT); u32 tsynctxctl; - if (timeout) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - e_warn(drv, "clearing Tx Timestamp hang\n"); + /* we have to have a valid skb to poll for a timestamp */ + if (!adapter->ptp_tx_skb) { + ixgbe_ptp_clear_tx_timestamp(adapter); return; } + /* stop polling once we have a valid timestamp */ tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); - if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) + if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) { ixgbe_ptp_tx_hwtstamp(adapter); - else + return; + } + + if (timeout) { + ixgbe_ptp_clear_tx_timestamp(adapter); + adapter->tx_hwtstamp_timeouts++; + e_warn(drv, "clearing Tx Timestamp hang\n"); + } else { /* reschedule to keep checking if it's not available yet */ schedule_work(&adapter->ptp_tx_work); + } } /** - * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp - * @adapter: pointer to adapter struct + * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer + * @q_vector: structure containing interrupt and ring information + * @skb: the packet + * + * This function will be called by the Rx routine of the timestamp for this + * packet is stored in the buffer. The value is stored in little endian format + * starting at the end of the packet data. + */ +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) +{ + __le64 regval; + + /* copy the bits out of the skb, and then trim the skb length */ + skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, ®val, + IXGBE_TS_HDR_LEN); + __pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN); + + /* The timestamp is recorded in little endian format, and is stored at + * the end of the packet. + * + * DWORD: N N + 1 N + 2 + * Field: End of Packet SYSTIMH SYSTIML + */ + ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval)); +} + +/** + * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp + * @q_vector: structure containing interrupt and ring information * @skb: particular skb to send timestamp with * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the shhwtstamps structure which * is passed up the network stack */ -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) { - struct ixgbe_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps *shhwtstamps; - u64 regval = 0, ns; + struct ixgbe_adapter *adapter; + struct ixgbe_hw *hw; + u64 regval = 0; u32 tsyncrxctl; - unsigned long flags; + + /* we cannot process timestamps on a ring without a q_vector */ + if (!q_vector || !q_vector->adapter) + return; + + adapter = q_vector->adapter; + hw = &adapter->hw; + + /* Read the tsyncrxctl register afterwards in order to prevent taking an + * I/O hit on every packet. + */ tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) @@ -537,17 +788,7 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - shhwtstamps = skb_hwtstamps(skb); - shhwtstamps->hwtstamp = ns_to_ktime(ns); - - /* Update the last_rx_timestamp timer in order to enable watchdog check - * for error case of latched timestamp on a dropped packet. - */ - adapter->last_rx_timestamp = jiffies; + ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); } int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) @@ -610,14 +851,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, case HWTSTAMP_FILTER_NONE: tsync_rx_ctl = 0; tsync_rx_mtrl = 0; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -631,9 +878,21 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; is_l2 = true; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_ALL: + /* The X550 controller is capable of timestamping all packets, + * which allows it to accept any filter. + */ + if (hw->mac.type >= ixgbe_mac_X550) { + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + break; + } + /* fall through */ default: /* * register RXMTRL must be set in order to do V1 packets, @@ -641,16 +900,46 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, * Delay_Req messages and hardware does not support * timestamping all packets => return error */ + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); config->rx_filter = HWTSTAMP_FILTER_NONE; return -ERANGE; } if (hw->mac.type == ixgbe_mac_82598EB) { + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); if (tsync_rx_ctl | tsync_tx_ctl) return -ERANGE; return 0; } + /* Per-packet timestamping only works if the filter is set to all + * packets. Since this is desired, always timestamp all packets as long + * as any Rx filter was configured. + */ + switch (hw->mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* enable timestamping all packets only if at least some + * packets were requested. Otherwise, play nice and disable + * timestamping + */ + if (config->rx_filter == HWTSTAMP_FILTER_NONE) + break; + + tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED | + IXGBE_TSYNCRXCTL_TYPE_ALL | + IXGBE_TSYNCRXCTL_TSIP_UT_EN; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; + is_l2 = true; + break; + default: + break; + } + /* define ethertype filter for timestamping L2 packets */ if (is_l2) IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), @@ -678,8 +967,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, IXGBE_WRITE_FLUSH(hw); /* clear TX/RX time stamp registers, just to be sure */ - regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH); - regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH); + ixgbe_ptp_clear_tx_timestamp(adapter); + IXGBE_READ_REG(hw, IXGBE_RXSTMPH); return 0; } @@ -712,23 +1001,9 @@ int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) -EFAULT : 0; } -/** - * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw - * @adapter: pointer to the adapter structure - * - * This function should be called to set the proper values for the TIMINCA - * register and tell the cyclecounter structure what the tick rate of SYSTIME - * is. It does not directly modify SYSTIME registers or the timecounter - * structure. It should be called whenever a new TIMINCA value is necessary, - * such as during initialization or when the link speed changes. - */ -void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter, + u32 *shift, u32 *incval) { - struct ixgbe_hw *hw = &adapter->hw; - u32 incval = 0; - u32 shift = 0; - unsigned long flags; - /** * Scale the NIC cycle counter by a large factor so that * relatively small corrections to the frequency can be added @@ -745,36 +1020,98 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) */ switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: - incval = IXGBE_INCVAL_100; - shift = IXGBE_INCVAL_SHIFT_100; + *shift = IXGBE_INCVAL_SHIFT_100; + *incval = IXGBE_INCVAL_100; break; case IXGBE_LINK_SPEED_1GB_FULL: - incval = IXGBE_INCVAL_1GB; - shift = IXGBE_INCVAL_SHIFT_1GB; + *shift = IXGBE_INCVAL_SHIFT_1GB; + *incval = IXGBE_INCVAL_1GB; break; case IXGBE_LINK_SPEED_10GB_FULL: default: - incval = IXGBE_INCVAL_10GB; - shift = IXGBE_INCVAL_SHIFT_10GB; + *shift = IXGBE_INCVAL_SHIFT_10GB; + *incval = IXGBE_INCVAL_10GB; break; } +} - /** - * Modify the calculated values to fit within the correct - * number of bits specified by the hardware. The 82599 doesn't - * have the same space as the X540, so bitshift the calculated - * values to fit. +/** + * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw + * @adapter: pointer to the adapter structure + * + * This function should be called to set the proper values for the TIMINCA + * register and tell the cyclecounter structure what the tick rate of SYSTIME + * is. It does not directly modify SYSTIME registers or the timecounter + * structure. It should be called whenever a new TIMINCA value is necessary, + * such as during initialization or when the link speed changes. + */ +void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct cyclecounter cc; + unsigned long flags; + u32 incval = 0; + u32 tsauxc = 0; + u32 fuse0 = 0; + + /* For some of the boards below this mask is technically incorrect. + * The timestamp mask overflows at approximately 61bits. However the + * particular hardware does not overflow on an even bitmask value. + * Instead, it overflows due to conversion of upper 32bits billions of + * cycles. Timecounters are not really intended for this purpose so + * they do not properly function if the overflow point isn't 2^N-1. + * However, the actual SYSTIME values in question take ~138 years to + * overflow. In practice this means they won't actually overflow. A + * proper fix to this problem would require modification of the + * timecounter delta calculations. */ + cc.mask = CLOCKSOURCE_MASK(64); + cc.mult = 1; + cc.shift = 0; + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + /* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is + * designed to represent seconds and nanoseconds when this is + * the case. However, some revisions of hardware have a 400Mhz + * clock and we have to compensate for this frequency + * variation using corrected mult and shift values. + */ + fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)); + if (!(fuse0 & IXGBE_FUSES0_300MHZ)) { + cc.mult = 3; + cc.shift = 2; + } + /* fallthrough */ + case ixgbe_mac_X550: + cc.read = ixgbe_ptp_read_X550; + + /* enable SYSTIME counter */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); + tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, + tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); + IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); + + IXGBE_WRITE_FLUSH(hw); + break; case ixgbe_mac_X540: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); break; case ixgbe_mac_82599EB: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); incval >>= IXGBE_INCVAL_SHIFT_82599; - shift -= IXGBE_INCVAL_SHIFT_82599; + cc.shift -= IXGBE_INCVAL_SHIFT_82599; IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, - (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + (1 << IXGBE_INCPER_SHIFT_82599) | incval); break; default: /* other devices aren't supported */ @@ -787,13 +1124,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) /* need lock to prevent incorrect read while modifying cyclecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - - memset(&adapter->cc, 0, sizeof(adapter->cc)); - adapter->cc.read = ixgbe_ptp_read; - adapter->cc.mask = CYCLECOUNTER_MASK(64); - adapter->cc.shift = shift; - adapter->cc.mult = 1; - + memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc)); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } @@ -814,29 +1145,27 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; - /* set SYSTIME registers to 0 just in case */ - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000); - IXGBE_WRITE_FLUSH(hw); - /* reset the hardware timestamping mode */ ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + /* 82598 does not support PTP */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + ixgbe_ptp_start_cyclecounter(adapter); spin_lock_irqsave(&adapter->tmreg_lock, flags); - - /* reset the ns time counter */ - timecounter_init(&adapter->tc, &adapter->cc, + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ktime_to_ns(ktime_get_real())); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - /* - * Now that the shift has been calculated and the systime + adapter->last_overflow_check = jiffies; + + /* Now that the shift has been calculated and the systime * registers reset, (re-)enable the Clock out feature */ - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); } /** @@ -845,11 +1174,11 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) * * This function performs setup of the user entry point function table and * initializes the PTP clock device, which is used to access the clock-like - * features of the PTP core. It will be called by ixgbe_ptp_init, only if - * there isn't already a clock device (such as after a suspend/resume cycle, - * where the clock device wasn't destroyed). + * features of the PTP core. It will be called by ixgbe_ptp_init, and may + * reuse a previously initialized clock (such as during a suspend/resume + * cycle). */ -static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) +static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; long err; @@ -869,11 +1198,12 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 1; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540; break; case ixgbe_mac_82599EB: snprintf(adapter->ptp_caps.name, @@ -885,14 +1215,31 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; + adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; + adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; + adapter->ptp_caps.settime64 = ixgbe_ptp_settime; + adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 30000000; + adapter->ptp_caps.n_alarm = 0; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.n_per_out = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = NULL; break; default: adapter->ptp_clock = NULL; + adapter->ptp_setup_sdp = NULL; return -EOPNOTSUPP; } @@ -961,18 +1308,13 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter) if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state)) return; - /* since this might be called in suspend, we don't clear the state, - * but simply reset the auxiliary PPS signal control register - */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0); + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); /* ensure that we cancel any pending PTP Tx work item in progress */ cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - } + ixgbe_ptp_clear_tx_timestamp(adapter); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 927349da232ac..1329eddfc9ce7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1020,6 +1020,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ #define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ #define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ +#define IXGBE_SYSTIMR 0x08C58 /* System time register Residue - RO */ #define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ #define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ #define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ @@ -1036,6 +1037,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ #define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ #define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ +#define IXGBE_TSIM 0x08C68 /* TimeSync Interrupt Mask Register - RW */ /* Diagnostic Registers */ #define IXGBE_RDSTATCTL 0x02C20 @@ -2213,6 +2215,7 @@ enum { #define IXGBE_TSAUXC_EN_CLK 0x00000004 #define IXGBE_TSAUXC_SYNCLK 0x00000008 #define IXGBE_TSAUXC_SDP0_INT 0x00000040 +#define IXGBE_TSAUXC_DISABLE_SYSTIME 0x80000000 #define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ @@ -2222,8 +2225,12 @@ enum { #define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 #define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 #define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IXGBE_TSYNCRXCTL_TYPE_ALL 0x08 #define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A #define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ +#define IXGBE_TSYNCRXCTL_TSIP_UT_EN 0x00800000 /* Rx Timestamp in Packet */ + +#define IXGBE_TSIM_TXTS 0x00000002 #define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF #define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 @@ -2336,6 +2343,7 @@ enum { #define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ #define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ #define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ +#define IXGBE_RXD_STAT_TSIP 0x08000 /* Time Stamp in packet buffer */ #define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ #define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ #define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ From efff2e027758fd5cc739d500397f729591f32a94 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Tue, 27 Oct 2015 13:23:14 -0700 Subject: [PATCH 10/15] ixgbe: Correct spec violations by waiting after reset The ixgbe driver was violating the specification in the datasheet by not waiting 1ms before checking for the reset bit clearing. This is called out for devices supported by ixgbe, so implement the required delay. Reported-by: Dan Streetman Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c | 5 +++-- drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 3 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c | 3 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 65db69b862fb9..8f09d291a0430 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -765,13 +765,14 @@ static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST) { status = IXGBE_ERR_RESET_FAILED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index a39afcf03e2c4..b8bd72589f729 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -990,13 +990,14 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index c1d4584f6469d..b9e9b0c173988 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -110,13 +110,14 @@ s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw) ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index b8ad3f212e726..f4ef0d1a5dbea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2146,13 +2146,14 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear meaning reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { From 48b44612738793252c97c548f3d0bd56543d5273 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Tue, 27 Oct 2015 13:23:23 -0700 Subject: [PATCH 11/15] ixgbe: Wait for master disable to be set According to the datasheets, the driver should wait for the master disable bit to read as being set before checking the status register for master disable. Reported-by: Dan Streetman Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index ce61b36b94f10..daec6aef5dc8b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -2454,6 +2454,17 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) /* Always set this bit to ensure any future transactions are blocked */ IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); + /* Poll for bit to read as set */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS) + break; + usleep_range(100, 120); + } + if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) { + hw_dbg(hw, "GIO disable did not set - requesting resets\n"); + goto gio_disable_fail; + } + /* Exit if master requests are blocked */ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) || ixgbe_removed(hw->hw_addr)) @@ -2475,6 +2486,7 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) * again to clear out any effects they may have had on our device. */ hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); +gio_disable_fail: hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; if (hw->mac.type >= ixgbe_mac_X550) From 988d13073fe122f0b6a2b80b5f2aa1b0717f9edb Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 30 Oct 2015 15:29:34 -0700 Subject: [PATCH 12/15] ixgbe: Save VF info and take references Save VF device pointers and take references to speed accesses used to monitor the device behavior to avoid slot resets. The saved information avoids lock contention during the search used to access each of the VFs. Signed-off-by: Mark Rustad Tested-by: Darin Miller Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 31 ++++-------- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 50 ++++++++++++++++++- 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 518f339476f8e..445b4c9169b61 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -139,6 +139,7 @@ enum ixgbe_tx_flags { #define IXGBE_X540_VF_DEVICE_ID 0x1515 struct vf_data_storage { + struct pci_dev *vfdev; unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 61926283259a9..a10a0facb4e86 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6666,10 +6666,8 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *vfdev; + unsigned int vf; u32 gpc; - int pos; - unsigned short vf_id; if (!(netif_carrier_ok(adapter->netdev))) return; @@ -6686,26 +6684,17 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) if (!pdev) return; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return; - - /* get the device ID for the VF */ - pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); - /* check status reg for all VFs owned by this PF */ - vfdev = pci_get_device(pdev->vendor, vf_id, NULL); - while (vfdev) { - if (vfdev->is_virtfn && (vfdev->physfn == pdev)) { - u16 status_reg; - - pci_read_config_word(vfdev, PCI_STATUS, &status_reg); - if (status_reg & PCI_STATUS_REC_MASTER_ABORT) - /* issue VFLR */ - ixgbe_issue_vf_flr(adapter, vfdev); - } + for (vf = 0; vf < adapter->num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + u16 status_reg; - vfdev = pci_get_device(pdev->vendor, vf_id, vfdev); + if (!vfdev) + continue; + pci_read_config_word(vfdev, PCI_STATUS, &status_reg); + if (status_reg != IXGBE_FAILED_READ_CFG_WORD && + status_reg & PCI_STATUS_REC_MASTER_ABORT) + ixgbe_issue_vf_flr(adapter, vfdev); } } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index fcd8b27a0ccba..31de6cf7adb01 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -130,6 +130,38 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter) return -ENOMEM; } +/** + * ixgbe_get_vfs - Find and take references to all vf devices + * @adapter: Pointer to adapter struct + */ +static void ixgbe_get_vfs(struct ixgbe_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + u16 vendor = pdev->vendor; + struct pci_dev *vfdev; + int vf = 0; + u16 vf_id; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); + + vfdev = pci_get_device(vendor, vf_id, NULL); + for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) { + if (!vfdev->is_virtfn) + continue; + if (vfdev->physfn != pdev) + continue; + if (vf >= adapter->num_vfs) + continue; + pci_dev_get(vfdev); + adapter->vfinfo[vf].vfdev = vfdev; + ++vf; + } +} + /* Note this function is called when the user wants to enable SR-IOV * VFs using the now deprecated module parameter */ @@ -170,8 +202,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) } } - if (!__ixgbe_enable_sriov(adapter)) + if (!__ixgbe_enable_sriov(adapter)) { + ixgbe_get_vfs(adapter); return; + } /* If we have gotten to this point then there is no memory available * to manage the VF devices - print message and bail. @@ -184,6 +218,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) #endif /* #ifdef CONFIG_PCI_IOV */ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { + unsigned int num_vfs = adapter->num_vfs, vf; struct ixgbe_hw *hw = &adapter->hw; u32 gpie; u32 vmdctl; @@ -192,6 +227,16 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + /* put the reference to all of the vf devices */ + for (vf = 0; vf < num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + + if (!vfdev) + continue; + adapter->vfinfo[vf].vfdev = NULL; + pci_dev_put(vfdev); + } + /* free VF control structures */ kfree(adapter->vfinfo); adapter->vfinfo = NULL; @@ -289,6 +334,7 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } + ixgbe_get_vfs(adapter); ixgbe_sriov_reinit(adapter); return num_vfs; From 36a92d7190e68e9387347695fe4625eb2c9e7e1c Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Wed, 18 Nov 2015 09:21:28 -0800 Subject: [PATCH 13/15] ixgbe: Handle extended IPv6 headers in Tx path Check for and handle IPv6 extended headers so that Tx checksum offload can be done. Also use skb_checksum_help for unexpected cases. Thanks to Tom Herbert for noticing these problems. Thanks to Alexander Duyck for recognizing problems with the first version of this patch and recognizing how to coalesce error conditions into a single location. Reported-by: Tom Herbert Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a10a0facb4e86..ebd4522e7879d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7063,6 +7063,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, struct tcphdr *tcphdr; u8 *raw; } transport_hdr; + __be16 frag_off; if (skb->encapsulation) { network_hdr.raw = skb_inner_network_header(skb); @@ -7086,13 +7087,17 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, case 6: vlan_macip_lens |= transport_hdr.raw - network_hdr.raw; l4_hdr = network_hdr.ipv6->nexthdr; + if (likely((transport_hdr.raw - network_hdr.raw) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but version=%d\n", - network_hdr.ipv4->version); - } + break; } switch (l4_hdr) { @@ -7113,16 +7118,18 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, version=%d, l4 proto=%x\n", + network_hdr.ipv4->version, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; From 3c2f2b77a917488b56b2676b99adb5d3c07d6e68 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 5 Nov 2015 11:02:14 -0800 Subject: [PATCH 14/15] ixgbe: Always turn PHY power on when requested Instead of inhibiting PHY power control when manageability is present, only inhibit turning PHY power off when manageability is present. Consequently, PHY power will always be turned on when requested. Without this patch, some systems with X540 or X550 devices in some conditions will never get link. Signed-off-by: Mark Rustad Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 3 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index fb8673d638068..db0731e05401e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -2393,6 +2393,9 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return 0; + if (!on && ixgbe_mng_present(hw)) + return 0; + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index b9e9b0c173988..bf8225ceab8e4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -57,8 +57,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw) struct ixgbe_phy_info *phy = &hw->phy; /* set_phy_power was set by default to NULL */ - if (!ixgbe_mng_present(hw)) - phy->ops.set_phy_power = ixgbe_set_copper_phy_power; + phy->ops.set_phy_power = ixgbe_set_copper_phy_power; mac->mcft_size = IXGBE_X540_MC_TBL_SIZE; mac->vft_size = IXGBE_X540_VFT_TBL_SIZE; From d34a614adfb16a560ddb6759d532eb32b6651eae Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 19 Nov 2015 13:56:30 -0800 Subject: [PATCH 15/15] ixgbevf: Handle extended IPv6 headers in Tx path Check for and handle IPv6 extended headers so that Tx checksum offload can be done. Also use skb_checksum_help for unexpected cases. Thanks to Tom Herbert for noticing these problems. Thanks to Alexander Duyck for seeing how to coalesce the error handling into one location. Reported-by: Tom Herbert Signed-off-by: Mark Rustad Tested-by: Darin Miller Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 42b971c0cc87e..f098952d4fb4a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3344,6 +3344,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4_hdr = 0; + __be16 frag_off; switch (first->protocol) { case htons(ETH_P_IP): @@ -3354,13 +3355,16 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, case htons(ETH_P_IPV6): vlan_macip_lens |= skb_network_header_len(skb); l4_hdr = ipv6_hdr(skb)->nexthdr; + if (likely(skb_network_header_len(skb) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } break; } @@ -3382,16 +3386,18 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, l3 proto=%x, l4 proto=%x\n", + first->protocol, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;