From c793fb17a4559eba1cabac9a52365d7e67aa8a7c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 11:00:09 -0700 Subject: [PATCH] --- yaml --- r: 315790 b: refs/heads/master c: 89aef8921bfbac22f00e04f8450f6e447db13e42 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/networking/bonding.txt | 6 +- trunk/Documentation/networking/ip-sysctl.txt | 14 - .../Documentation/networking/openvswitch.txt | 2 +- trunk/drivers/net/dummy.c | 4 +- .../ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 1 - .../net/ethernet/intel/e1000/e1000_ethtool.c | 1 - .../net/ethernet/intel/e1000e/ethtool.c | 1 - .../drivers/net/ethernet/intel/igb/igb_main.c | 3 +- .../drivers/net/ethernet/intel/ixgbe/ixgbe.h | 16 +- .../net/ethernet/intel/ixgbe/ixgbe_dcb.c | 12 +- .../net/ethernet/intel/ixgbe/ixgbe_lib.c | 41 +- .../net/ethernet/intel/ixgbe/ixgbe_main.c | 140 ++- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 151 +-- .../net/ethernet/intel/ixgbe/ixgbe_sriov.h | 1 + .../net/ethernet/intel/ixgbe/ixgbe_type.h | 1 - .../net/ethernet/intel/ixgbevf/ixgbevf.h | 3 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 164 +-- .../net/ethernet/neterion/vxge/vxge-main.c | 8 +- trunk/drivers/net/ethernet/nvidia/forcedeth.c | 5 +- trunk/drivers/net/ethernet/ti/davinci_emac.c | 45 +- trunk/drivers/net/loopback.c | 4 +- trunk/drivers/net/tun.c | 148 +-- trunk/drivers/net/virtio_net.c | 8 +- trunk/drivers/net/wimax/i2400m/control.c | 4 +- trunk/drivers/net/wimax/i2400m/usb-fw.c | 2 +- trunk/drivers/vhost/net.c | 4 +- trunk/drivers/vhost/test.c | 4 +- trunk/drivers/vhost/vhost.c | 5 +- trunk/drivers/vhost/vhost.h | 6 +- trunk/include/linux/net.h | 1 - trunk/include/linux/skbuff.h | 16 - trunk/include/net/arp.h | 3 + trunk/include/net/netprio_cgroup.h | 4 +- trunk/include/net/route.h | 1 - trunk/include/net/sctp/constants.h | 1 - trunk/include/net/sctp/structs.h | 20 +- trunk/include/net/sctp/user.h | 11 - trunk/net/bridge/br_device.c | 4 +- trunk/net/core/dev.c | 7 +- trunk/net/core/netprio_cgroup.c | 53 - trunk/net/core/rtnetlink.c | 2 - trunk/net/core/skbuff.c | 24 +- trunk/net/core/sock.c | 6 +- trunk/net/ipv4/fib_frontend.c | 5 - trunk/net/ipv4/ip_output.c | 3 +- trunk/net/ipv4/route.c | 940 +----------------- trunk/net/openvswitch/actions.c | 2 +- trunk/net/openvswitch/datapath.c | 13 +- trunk/net/openvswitch/datapath.h | 2 +- trunk/net/openvswitch/dp_notify.c | 2 +- trunk/net/openvswitch/flow.c | 5 +- trunk/net/openvswitch/flow.h | 2 +- trunk/net/openvswitch/vport-internal_dev.c | 10 +- trunk/net/openvswitch/vport-internal_dev.h | 2 +- trunk/net/openvswitch/vport-netdev.c | 2 +- trunk/net/openvswitch/vport-netdev.h | 2 +- trunk/net/openvswitch/vport.c | 2 +- trunk/net/openvswitch/vport.h | 2 +- trunk/net/sctp/associola.c | 37 +- trunk/net/sctp/outqueue.c | 6 +- trunk/net/sctp/sm_sideeffect.c | 33 +- trunk/net/sctp/socket.c | 101 -- trunk/net/sctp/sysctl.c | 9 - trunk/net/sctp/transport.c | 4 +- trunk/net/socket.c | 5 +- 66 files changed, 349 insertions(+), 1799 deletions(-) diff --git a/[refs] b/[refs] index 07a9ef405cff..d4b86eb8d167 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 3ba97381343b271296487bf073eb670d5465a8b8 +refs/heads/master: 89aef8921bfbac22f00e04f8450f6e447db13e42 diff --git a/trunk/Documentation/networking/bonding.txt b/trunk/Documentation/networking/bonding.txt index 6b1c7110534e..bfea8a338901 100644 --- a/trunk/Documentation/networking/bonding.txt +++ b/trunk/Documentation/networking/bonding.txt @@ -1210,7 +1210,7 @@ options, you may wish to use the "max_bonds" module parameter, documented above. To create multiple bonding devices with differing options, it is -preferable to use bonding parameters exported by sysfs, documented in the +preferrable to use bonding parameters exported by sysfs, documented in the section below. For versions of bonding without sysfs support, the only means to @@ -1950,7 +1950,7 @@ access to fail over to. Additionally, the bonding load balance modes support link monitoring of their members, so if individual links fail, the load will be rebalanced across the remaining devices. - See Section 12, "Configuring Bonding for Maximum Throughput" + See Section 13, "Configuring Bonding for Maximum Throughput" for information on configuring bonding with one peer device. 11.2 High Availability in a Multiple Switch Topology @@ -2620,7 +2620,7 @@ be found at: https://lists.sourceforge.net/lists/listinfo/bonding-devel - Discussions regarding the development of the bonding driver take place + Discussions regarding the developpement of the bonding driver take place on the main Linux network mailing list, hosted at vger.kernel.org. The list address is: diff --git a/trunk/Documentation/networking/ip-sysctl.txt b/trunk/Documentation/networking/ip-sysctl.txt index 406a5226220d..5f3ef7f7fcec 100644 --- a/trunk/Documentation/networking/ip-sysctl.txt +++ b/trunk/Documentation/networking/ip-sysctl.txt @@ -1440,20 +1440,6 @@ path_max_retrans - INTEGER Default: 5 -pf_retrans - INTEGER - The number of retransmissions that will be attempted on a given path - before traffic is redirected to an alternate transport (should one - exist). Note this is distinct from path_max_retrans, as a path that - passes the pf_retrans threshold can still be used. Its only - deprioritized when a transmission path is selected by the stack. This - setting is primarily used to enable fast failover mechanisms without - having to reduce path_max_retrans to a very low value. See: - http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt - for details. Note also that a value of pf_retrans > path_max_retrans - disables this feature - - Default: 0 - rto_initial - INTEGER The initial round trip timeout value in milliseconds that will be used in calculating round trip times. This is the initial time interval diff --git a/trunk/Documentation/networking/openvswitch.txt b/trunk/Documentation/networking/openvswitch.txt index 8fa2dd1e792e..b8a048b8df3a 100644 --- a/trunk/Documentation/networking/openvswitch.txt +++ b/trunk/Documentation/networking/openvswitch.txt @@ -118,7 +118,7 @@ essentially like this, ignoring metadata: Naively, to add VLAN support, it makes sense to add a new "vlan" flow key attribute to contain the VLAN tag, then continue to decode the encapsulated headers beyond the VLAN tag using the existing field -definitions. With this change, a TCP packet in VLAN 10 would have a +definitions. With this change, an TCP packet in VLAN 10 would have a flow key much like this: eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...) diff --git a/trunk/drivers/net/dummy.c b/trunk/drivers/net/dummy.c index c260af5411d0..9d6a0677466b 100644 --- a/trunk/drivers/net/dummy.c +++ b/trunk/drivers/net/dummy.c @@ -63,10 +63,10 @@ static struct rtnl_link_stats64 *dummy_get_stats64(struct net_device *dev, dstats = per_cpu_ptr(dev->dstats, i); do { - start = u64_stats_fetch_begin_bh(&dstats->syncp); + start = u64_stats_fetch_begin(&dstats->syncp); tbytes = dstats->tx_bytes; tpackets = dstats->tx_packets; - } while (u64_stats_fetch_retry_bh(&dstats->syncp, start)); + } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpackets; } diff --git a/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index fc4e0e3885b0..bff31290198b 100644 --- a/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2978,7 +2978,6 @@ static const struct ethtool_ops bnx2x_ethtool_ops = { .get_module_eeprom = bnx2x_get_module_eeprom, .get_eee = bnx2x_get_eee, .set_eee = bnx2x_set_eee, - .get_ts_info = ethtool_op_get_ts_info, }; void bnx2x_set_ethtool_ops(struct net_device *netdev) diff --git a/trunk/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/trunk/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 736a7d987db5..3103f0b6bf5e 100644 --- a/trunk/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/trunk/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1851,7 +1851,6 @@ static const struct ethtool_ops e1000_ethtool_ops = { .get_sset_count = e1000_get_sset_count, .get_coalesce = e1000_get_coalesce, .set_coalesce = e1000_set_coalesce, - .get_ts_info = ethtool_op_get_ts_info, }; void e1000_set_ethtool_ops(struct net_device *netdev) diff --git a/trunk/drivers/net/ethernet/intel/e1000e/ethtool.c b/trunk/drivers/net/ethernet/intel/e1000e/ethtool.c index 0349e2478df8..105d554ea9db 100644 --- a/trunk/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/trunk/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2061,7 +2061,6 @@ static const struct ethtool_ops e1000_ethtool_ops = { .get_coalesce = e1000_get_coalesce, .set_coalesce = e1000_set_coalesce, .get_rxnfc = e1000_get_rxnfc, - .get_ts_info = ethtool_op_get_ts_info, }; void e1000e_set_ethtool_ops(struct net_device *netdev) diff --git a/trunk/drivers/net/ethernet/intel/igb/igb_main.c b/trunk/drivers/net/ethernet/intel/igb/igb_main.c index 1050411e7ca3..8adeca9787ca 100644 --- a/trunk/drivers/net/ethernet/intel/igb/igb_main.c +++ b/trunk/drivers/net/ethernet/intel/igb/igb_main.c @@ -1500,12 +1500,11 @@ static void igb_configure(struct igb_adapter *adapter) **/ void igb_power_up_link(struct igb_adapter *adapter) { - igb_reset_phy(&adapter->hw); - if (adapter->hw.phy.media_type == e1000_media_type_copper) igb_power_up_phy_copper(&adapter->hw); else igb_power_up_serdes_link_82575(&adapter->hw); + igb_reset_phy(&adapter->hw); } /** diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b9623e9ea895..5a286adc65c0 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -77,18 +77,17 @@ #define IXGBE_MAX_FCPAUSE 0xFFFF /* Supported Rx Buffer Sizes */ -#define IXGBE_RXBUFFER_256 256 /* Used for skb receive header */ +#define IXGBE_RXBUFFER_512 512 /* Used for packet split */ #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* - * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we - * reserve 64 more, and skb_shared_info adds an additional 320 bytes more, - * this adds up to 448 bytes of extra data. - * - * Since netdev_alloc_skb now allocates a page fragment we can use a value - * of 256 and the resultant skb will have a truesize of 960 or less. + * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we + * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, + * this adds up to 512 bytes of extra data meaning the smallest allocation + * we could have is 1K. + * i.e. RXBUFFER_512 --> size-1024 slab */ -#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_256 +#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_512 #define MAXIMUM_ETHERNET_VLAN_SIZE (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN) @@ -131,6 +130,7 @@ struct vf_data_storage { u16 tx_rate; u16 vlan_count; u8 spoofchk_enabled; + struct pci_dev *vfdev; }; struct vf_macvlans { diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c index 9bc17c0cb972..5442b359141e 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c @@ -232,22 +232,18 @@ u8 ixgbe_dcb_get_tc_from_up(struct ixgbe_dcb_config *cfg, int direction, u8 up) { struct tc_configuration *tc_config = &cfg->tc_config[0]; u8 prio_mask = 1 << up; - u8 tc = cfg->num_tcs.pg_tcs; - - /* If tc is 0 then DCB is likely not enabled or supported */ - if (!tc) - goto out; + u8 tc; /* - * Test from maximum TC to 1 and report the first match we find. If + * Test for TCs 7 through 1 and report the first match we find. If * we find no match we can assume that the TC is 0 since the TC must * be set for all user priorities */ - for (tc--; tc; tc--) { + for (tc = MAX_TRAFFIC_CLASS - 1; tc; tc--) { if (prio_mask & tc_config[tc].path[direction].up_to_tc_bitmap) break; } -out: + return tc; } diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 17ecbcedd548..38d1b65777ad 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -370,9 +370,6 @@ static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter) adapter->ring_feature[RING_F_RSS].indices = 1; adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK; - /* disable ATR as it is not supported when VMDq is enabled */ - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - adapter->num_rx_pools = vmdq_i; adapter->num_rx_queues_per_pool = tcs; @@ -453,9 +450,6 @@ static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) f->indices = rss_i; f->mask = rss_m; - /* disable ATR as it is not supported when multiple TCs are enabled */ - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - #ifdef IXGBE_FCOE /* FCoE enabled queues require special configuration indexed * by feature specific indices and offset. Here we map FCoE @@ -612,22 +606,16 @@ static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) f->indices = rss_i; f->mask = IXGBE_RSS_16Q_MASK; - /* disable ATR by default, it will be configured below */ - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - /* * Use Flow Director in addition to RSS to ensure the best * distribution of flows across cores, even when an FDIR flow * isn't matched. */ - if (rss_i > 1 && adapter->atr_sample_rate) { + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { f = &adapter->ring_feature[RING_F_FDIR]; f->indices = min_t(u16, num_online_cpus(), f->limit); rss_i = max_t(u16, rss_i, f->indices); - - if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; } #ifdef IXGBE_FCOE @@ -1065,27 +1053,18 @@ static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter) return; } - /* disable DCB if number of TCs exceeds 1 */ - if (netdev_get_num_tc(adapter->netdev) > 1) { - e_err(probe, "num TCs exceeds number of queues - disabling DCB\n"); - netdev_reset_tc(adapter->netdev); - - if (adapter->hw.mac.type == ixgbe_mac_82598EB) - adapter->hw.fc.requested_mode = adapter->last_lfc_mode; - - adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; - adapter->temp_dcb_cfg.pfc_mode_enable = false; - adapter->dcb_cfg.pfc_mode_enable = false; + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + e_err(probe, + "ATR is not supported while multiple " + "queues are disabled. Disabling Flow Director\n"); } - adapter->dcb_cfg.num_tcs.pg_tcs = 1; - adapter->dcb_cfg.num_tcs.pfc_tcs = 1; - - /* disable SR-IOV */ - ixgbe_disable_sriov(adapter); + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + adapter->atr_sample_rate = 0; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + ixgbe_disable_sriov(adapter); - /* disable RSS */ adapter->ring_feature[RING_F_RSS].limit = 1; - ixgbe_set_num_queues(adapter); adapter->num_q_vectors = 1; diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3b6784cf134a..f4e53c1a7338 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1517,8 +1517,8 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, * 60 bytes if the skb->len is less than 60 for skb_pad. */ pull_len = skb_frag_size(frag); - if (pull_len > IXGBE_RX_HDR_SIZE) - pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); + if (pull_len > 256) + pull_len = ixgbe_get_headlen(va, pull_len); /* align pull length to size of long to optimize memcpy performance */ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); @@ -2688,7 +2688,8 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize flowdirector state */ - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) && + adapter->atr_sample_rate) { ring->atr_sample_rate = adapter->atr_sample_rate; ring->atr_count = 0; set_bit(__IXGBE_TX_FDIR_INIT_DONE, &ring->state); @@ -3441,18 +3442,14 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - unsigned int rar_entries = hw->mac.num_rar_entries - 1; + unsigned int rar_entries = IXGBE_MAX_PF_MACVLANS; int count = 0; - /* In SR-IOV mode significantly less RAR entries are available */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - rar_entries = IXGBE_MAX_PF_MACVLANS - 1; - /* return ENOMEM indicating insufficient memory for addresses */ if (netdev_uc_count(netdev) > rar_entries) return -ENOMEM; - if (!netdev_uc_empty(netdev)) { + if (!netdev_uc_empty(netdev) && rar_entries) { struct netdev_hw_addr *ha; /* return error if we do not support writing to RAR table */ if (!hw->mac.ops.set_rar) @@ -4422,6 +4419,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; /* Flow Director hash filters enabled */ + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->atr_sample_rate = 20; adapter->ring_feature[RING_F_FDIR].limit = IXGBE_MAX_FDIR_INDICES; @@ -4492,12 +4490,6 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) hw->fc.send_xon = true; hw->fc.disable_fc_autoneg = false; -#ifdef CONFIG_PCI_IOV - /* assign number of SR-IOV VFs */ - if (hw->mac.type != ixgbe_mac_82598EB) - adapter->num_vfs = (max_vfs > 63) ? 0 : max_vfs; - -#endif /* enable itr by default in dynamic mode */ adapter->rx_itr_setting = 1; adapter->tx_itr_setting = 1; @@ -6703,6 +6695,12 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; + /* Multiple traffic classes requires multiple queues */ + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { + e_err(drv, "Enable failed, needs MSI-X\n"); + return -EINVAL; + } + /* Hardware supports up to 8 traffic classes */ if (tc > adapter->dcb_cfg.num_tcs.pg_tcs || (hw->mac.type == ixgbe_mac_82598EB && @@ -6722,6 +6720,7 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) ixgbe_set_prio_tc_map(adapter); adapter->flags |= IXGBE_FLAG_DCB_ENABLED; + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { adapter->last_lfc_mode = adapter->hw.fc.requested_mode; @@ -6734,6 +6733,7 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) adapter->hw.fc.requested_mode = adapter->last_lfc_mode; adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->temp_dcb_cfg.pfc_mode_enable = false; adapter->dcb_cfg.pfc_mode_enable = false; @@ -6802,40 +6802,20 @@ static int ixgbe_set_features(struct net_device *netdev, * Check if Flow Director n-tuple support was enabled or disabled. If * the state changed, we need to reset. */ - switch (features & NETIF_F_NTUPLE) { - case NETIF_F_NTUPLE: - /* turn off ATR, enable perfect filters and reset */ - if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) + if (!(features & NETIF_F_NTUPLE)) { + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + /* turn off Flow Director, set ATR and reset */ + if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && + !(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; need_reset = true; - + } + adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + } else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) { + /* turn off ATR, enable perfect filters and reset */ adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - break; - default: - /* turn off perfect filters, enable ATR and reset */ - if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) - need_reset = true; - - adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - - /* We cannot enable ATR if SR-IOV is enabled */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - break; - - /* We cannot enable ATR if we have 2 or more traffic classes */ - if (netdev_get_num_tc(netdev) > 1) - break; - - /* We cannot enable ATR if RSS is disabled */ - if (adapter->ring_feature[RING_F_RSS].limit <= 1) - break; - - /* A sample rate of 0 indicates ATR disabled */ - if (!adapter->atr_sample_rate) - break; - - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - break; + need_reset = true; } if (features & NETIF_F_HW_VLAN_RX) @@ -6859,10 +6839,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, u16 flags) { struct ixgbe_adapter *adapter = netdev_priv(dev); - int err; - - if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) - return -EOPNOTSUPP; + int err = -EOPNOTSUPP; if (ndm->ndm_state & NUD_PERMANENT) { pr_info("%s: FDB only supports static addresses\n", @@ -6870,17 +6847,13 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, return -EINVAL; } - if (is_unicast_ether_addr(addr)) { - u32 rar_uc_entries = IXGBE_MAX_PF_MACVLANS; - - if (netdev_uc_count(dev) < rar_uc_entries) + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + if (is_unicast_ether_addr(addr)) err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); else - err = -ENOMEM; - } else if (is_multicast_ether_addr(addr)) { - err = dev_mc_add_excl(dev, addr); - } else { - err = -EINVAL; + err = -EINVAL; } /* Only return duplicate errors if NLM_F_EXCL is set */ @@ -6969,6 +6942,26 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_fdb_dump = ixgbe_ndo_fdb_dump, }; +static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter, + const struct ixgbe_info *ii) +{ +#ifdef CONFIG_PCI_IOV + struct ixgbe_hw *hw = &adapter->hw; + + if (hw->mac.type == ixgbe_mac_82598EB) + return; + + /* The 82599 supports up to 64 VFs per physical function + * but this implementation limits allocation to 63 so that + * basic networking resources are still available to the + * physical function. If the user requests greater thn + * 63 VFs then it is an error - reset to default of zero. + */ + adapter->num_vfs = (max_vfs > 63) ? 0 : max_vfs; + ixgbe_enable_sriov(adapter, ii); +#endif /* CONFIG_PCI_IOV */ +} + /** * ixgbe_wol_supported - Check whether device supports WoL * @hw: hw specific details @@ -6995,7 +6988,6 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, if (hw->bus.func != 0) break; case IXGBE_SUBDEV_ID_82599_SFP: - case IXGBE_SUBDEV_ID_82599_RNDC: is_wol_supported = 1; break; } @@ -7043,7 +7035,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, int i, err, pci_using_dac; u8 part_str[IXGBE_PBANUM_LENGTH]; unsigned int indices = num_possible_cpus(); - unsigned int dcb_max = 0; #ifdef IXGBE_FCOE u16 device_caps; #endif @@ -7093,16 +7084,15 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, pci_save_state(pdev); #ifdef CONFIG_IXGBE_DCB - if (ii->mac == ixgbe_mac_82598EB) - dcb_max = min_t(unsigned int, indices * MAX_TRAFFIC_CLASS, - IXGBE_MAX_RSS_INDICES); - else - dcb_max = min_t(unsigned int, indices * MAX_TRAFFIC_CLASS, - IXGBE_MAX_FDIR_INDICES); + indices *= MAX_TRAFFIC_CLASS; #endif if (ii->mac == ixgbe_mac_82598EB) +#ifdef CONFIG_IXGBE_DCB + indices = min_t(unsigned int, indices, MAX_TRAFFIC_CLASS * 4); +#else indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES); +#endif else indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES); @@ -7110,7 +7100,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, indices += min_t(unsigned int, num_possible_cpus(), IXGBE_MAX_FCOE_INDICES); #endif - indices = max_t(unsigned int, dcb_max, indices); netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); if (!netdev) { err = -ENOMEM; @@ -7217,10 +7206,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, goto err_sw_init; } -#ifdef CONFIG_PCI_IOV - ixgbe_enable_sriov(adapter, ii); + ixgbe_probe_vf(adapter, ii); -#endif netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -7424,7 +7411,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, ixgbe_release_hw_control(adapter); ixgbe_clear_interrupt_scheme(adapter); err_sw_init: - ixgbe_disable_sriov(adapter); + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + ixgbe_disable_sriov(adapter); adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; iounmap(hw->hw_addr); err_ioremap: @@ -7477,7 +7465,13 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); - ixgbe_disable_sriov(adapter); + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + if (!(ixgbe_check_vf_assignment(adapter))) + ixgbe_disable_sriov(adapter); + else + e_dev_warn("Unloading driver while VFs are assigned " + "- VFs will not be deallocated\n"); + } ixgbe_clear_interrupt_scheme(adapter); diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 4fea8716ab64..a825d4808cd2 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -44,15 +44,50 @@ #include "ixgbe_sriov.h" #ifdef CONFIG_PCI_IOV +static int ixgbe_find_enabled_vfs(struct ixgbe_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct pci_dev *pvfdev; + u16 vf_devfn = 0; + int device_id; + int vfs_found = 0; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + device_id = IXGBE_DEV_ID_82599_VF; + break; + case ixgbe_mac_X540: + device_id = IXGBE_DEV_ID_X540_VF; + break; + default: + device_id = 0; + break; + } + + vf_devfn = pdev->devfn + 0x80; + pvfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, NULL); + while (pvfdev) { + if (pvfdev->devfn == vf_devfn && + (pvfdev->bus->number >= pdev->bus->number)) + vfs_found++; + vf_devfn += 2; + pvfdev = pci_get_device(PCI_VENDOR_ID_INTEL, + device_id, pvfdev); + } + + return vfs_found; +} + void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, const struct ixgbe_info *ii) { struct ixgbe_hw *hw = &adapter->hw; + int err = 0; int num_vf_macvlans, i; struct vf_macvlans *mv_list; int pre_existing_vfs = 0; - pre_existing_vfs = pci_num_vf(adapter->pdev); + pre_existing_vfs = ixgbe_find_enabled_vfs(adapter); if (!pre_existing_vfs && !adapter->num_vfs) return; @@ -71,21 +106,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, "enabled for this device - Please reload all " "VF drivers to avoid spoofed packet errors\n"); } else { - int err; - /* - * The 82599 supports up to 64 VFs per physical function - * but this implementation limits allocation to 63 so that - * basic networking resources are still available to the - * physical function. If the user requests greater thn - * 63 VFs then it is an error - reset to default of zero. - */ - adapter->num_vfs = min_t(unsigned int, adapter->num_vfs, 63); - err = pci_enable_sriov(adapter->pdev, adapter->num_vfs); if (err) { e_err(probe, "Failed to enable PCI sriov: %d\n", err); - adapter->num_vfs = 0; - return; + goto err_novfs; } } @@ -169,48 +193,20 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, /* Oh oh */ e_err(probe, "Unable to allocate memory for VF Data Storage - " "SRIOV disabled\n"); - ixgbe_disable_sriov(adapter); -} - -static bool ixgbe_vfs_are_assigned(struct ixgbe_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - struct pci_dev *vfdev; - int dev_id; - - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - dev_id = IXGBE_DEV_ID_82599_VF; - break; - case ixgbe_mac_X540: - dev_id = IXGBE_DEV_ID_X540_VF; - break; - default: - return false; - } - - /* loop through all the VFs to see if we own any that are assigned */ - vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL); - while (vfdev) { - /* if we don't own it we don't care */ - if (vfdev->is_virtfn && vfdev->physfn == pdev) { - /* if it is assigned we cannot release it */ - if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) - return true; - } - - vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev); - } + pci_disable_sriov(adapter->pdev); - return false; +err_novfs: + adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; + adapter->num_vfs = 0; } - #endif /* #ifdef CONFIG_PCI_IOV */ + void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 gpie; u32 vmdctl; + int i; /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; @@ -223,20 +219,7 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) kfree(adapter->mv_list); adapter->mv_list = NULL; - /* if SR-IOV is already disabled then there is nothing to do */ - if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) - return; - #ifdef CONFIG_PCI_IOV - /* - * If our VFs are assigned we cannot shut down SR-IOV - * without causing issues, so just leave the hardware - * available but disabled - */ - if (ixgbe_vfs_are_assigned(adapter)) { - e_dev_warn("Unloading driver while VFs are assigned - VFs will not be deallocated\n"); - return; - } /* disable iov and allow time for transactions to clear */ pci_disable_sriov(adapter->pdev); #endif @@ -261,6 +244,12 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) /* take a breather then clean up driver data */ msleep(100); + /* Release reference to VF devices */ + for (i = 0; i < adapter->num_vfs; i++) { + if (adapter->vfinfo[i].vfdev) + pci_dev_put(adapter->vfinfo[i].vfdev); + } + adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; } @@ -494,11 +483,28 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, return 0; } +int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter) +{ +#ifdef CONFIG_PCI_IOV + int i; + for (i = 0; i < adapter->num_vfs; i++) { + if (adapter->vfinfo[i].vfdev->dev_flags & + PCI_DEV_FLAGS_ASSIGNED) + return true; + } +#endif + return false; +} + int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask) { unsigned char vf_mac_addr[6]; struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); unsigned int vfn = (event_mask & 0x3f); + struct pci_dev *pvfdev; + unsigned int device_id; + u16 thisvf_devfn = (pdev->devfn + 0x80 + (vfn << 1)) | + (pdev->devfn & 1); bool enable = ((event_mask & 0x10000000U) != 0); @@ -511,6 +517,31 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask) * for it later. */ memcpy(adapter->vfinfo[vfn].vf_mac_addresses, vf_mac_addr, 6); + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + device_id = IXGBE_DEV_ID_82599_VF; + break; + case ixgbe_mac_X540: + device_id = IXGBE_DEV_ID_X540_VF; + break; + default: + device_id = 0; + break; + } + + pvfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, NULL); + while (pvfdev) { + if (pvfdev->devfn == thisvf_devfn) + break; + pvfdev = pci_get_device(PCI_VENDOR_ID_INTEL, + device_id, pvfdev); + } + if (pvfdev) + adapter->vfinfo[vfn].vfdev = pvfdev; + else + e_err(drv, "Couldn't find pci dev ptr for VF %4.4x\n", + thisvf_devfn); } return 0; diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 1be1d30e4e78..2ab38d5fda92 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -42,6 +42,7 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); +int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, const struct ixgbe_info *ii); diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 400f86a31174..fe0a19d91d4a 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -54,7 +54,6 @@ #define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152a #define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 #define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 -#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 #define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 #define IXGBE_DEV_ID_82599_SFP_EM 0x1507 #define IXGBE_DEV_ID_82599_SFP_SF2 0x154D diff --git a/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 98cadb0c4dab..e167d1bb6dea 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -249,8 +249,6 @@ struct ixgbevf_adapter { bool link_up; struct work_struct watchdog_task; - - spinlock_t mbx_lock; }; enum ixbgevf_state_t { @@ -286,6 +284,7 @@ extern void ixgbevf_free_rx_resources(struct ixgbevf_adapter *, extern void ixgbevf_free_tx_resources(struct ixgbevf_adapter *, struct ixgbevf_ring *); extern void ixgbevf_update_stats(struct ixgbevf_adapter *adapter); +void ixgbevf_write_eitr(struct ixgbevf_q_vector *); extern int ethtool_ioctl(struct ifreq *ifr); extern void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter); diff --git a/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3f9841d619ad..2dc78d7e297a 100644 --- a/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/trunk/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -540,25 +540,6 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) return 0; } -/** - * ixgbevf_write_eitr - write VTEITR register in hardware specific way - * @q_vector: structure containing interrupt and ring information - */ -static void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector) -{ - struct ixgbevf_adapter *adapter = q_vector->adapter; - struct ixgbe_hw *hw = &adapter->hw; - int v_idx = q_vector->v_idx; - u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR; - - /* - * set the WDIS bit to not clear the timer bits and cause an - * immediate assertion of the interrupt - */ - itr_reg |= IXGBE_EITR_CNT_WDIS; - - IXGBE_WRITE_REG(hw, IXGBE_VTEITR(v_idx), itr_reg); -} /** * ixgbevf_configure_msix - Configure MSI-X hardware @@ -681,6 +662,30 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector, ring_container->itr = itr_setting; } +/** + * ixgbevf_write_eitr - write VTEITR register in hardware specific way + * @q_vector: structure containing interrupt and ring information + * + * This function is made to be called by ethtool and by the driver + * when it needs to update VTEITR registers at runtime. Hardware + * specific quirks/differences are taken care of here. + */ +void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector) +{ + struct ixgbevf_adapter *adapter = q_vector->adapter; + struct ixgbe_hw *hw = &adapter->hw; + int v_idx = q_vector->v_idx; + u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR; + + /* + * set the WDIS bit to not clear the timer bits and cause an + * immediate assertion of the interrupt + */ + itr_reg |= IXGBE_EITR_CNT_WDIS; + + IXGBE_WRITE_REG(hw, IXGBE_VTEITR(v_idx), itr_reg); +} + static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) { u32 new_itr = q_vector->itr; @@ -1115,14 +1120,9 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - spin_lock(&adapter->mbx_lock); - /* add VID to filter table */ if (hw->mac.ops.set_vfta) hw->mac.ops.set_vfta(hw, vid, 0, true); - - spin_unlock(&adapter->mbx_lock); - set_bit(vid, adapter->active_vlans); return 0; @@ -1133,14 +1133,9 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - spin_lock(&adapter->mbx_lock); - /* remove VID from filter table */ if (hw->mac.ops.set_vfta) hw->mac.ops.set_vfta(hw, vid, 0, false); - - spin_unlock(&adapter->mbx_lock); - clear_bit(vid, adapter->active_vlans); return 0; @@ -1195,15 +1190,11 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - spin_lock(&adapter->mbx_lock); - /* reprogram multicast list */ if (hw->mac.ops.update_mc_addr_list) hw->mac.ops.update_mc_addr_list(hw, netdev); ixgbevf_write_uc_addr_list(netdev); - - spin_unlock(&adapter->mbx_lock); } static void ixgbevf_napi_enable_all(struct ixgbevf_adapter *adapter) @@ -1348,8 +1339,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) ixgbevf_configure_msix(adapter); - spin_lock(&adapter->mbx_lock); - if (hw->mac.ops.set_rar) { if (is_valid_ether_addr(hw->mac.addr)) hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); @@ -1361,8 +1350,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) msg[1] = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; hw->mbx.ops.write_posted(hw, msg, 2); - spin_unlock(&adapter->mbx_lock); - clear_bit(__IXGBEVF_DOWN, &adapter->state); ixgbevf_napi_enable_all(adapter); @@ -1575,15 +1562,11 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; - spin_lock(&adapter->mbx_lock); - if (hw->mac.ops.reset_hw(hw)) hw_dbg(hw, "PF still resetting\n"); else hw->mac.ops.init_hw(hw); - spin_unlock(&adapter->mbx_lock); - if (is_valid_ether_addr(adapter->hw.mac.addr)) { memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); @@ -1910,9 +1893,6 @@ static int __devinit ixgbevf_sw_init(struct ixgbevf_adapter *adapter) adapter->netdev->addr_len); } - /* lock to protect mailbox accesses */ - spin_lock_init(&adapter->mbx_lock); - /* Enable dynamic interrupt throttling rates */ adapter->rx_itr_setting = 1; adapter->tx_itr_setting = 1; @@ -2052,16 +2032,8 @@ static void ixgbevf_watchdog_task(struct work_struct *work) * no LSC interrupt */ if (hw->mac.ops.check_link) { - s32 need_reset; - - spin_lock(&adapter->mbx_lock); - - need_reset = hw->mac.ops.check_link(hw, &link_speed, - &link_up, false); - - spin_unlock(&adapter->mbx_lock); - - if (need_reset) { + if ((hw->mac.ops.check_link(hw, &link_speed, + &link_up, false)) != 0) { adapter->link_up = link_up; adapter->link_speed = link_speed; netif_carrier_off(netdev); @@ -2841,13 +2813,9 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - spin_lock(&adapter->mbx_lock); - if (hw->mac.ops.set_rar) hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); - spin_unlock(&adapter->mbx_lock); - return 0; } @@ -3184,92 +3152,12 @@ static void __devexit ixgbevf_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -/** - * ixgbevf_io_error_detected - called when PCI error is detected - * @pdev: Pointer to PCI device - * @state: The current pci connection state - * - * This function is called after a PCI bus error affecting - * this device has been detected. - */ -static pci_ers_result_t ixgbevf_io_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - - netif_device_detach(netdev); - - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - if (netif_running(netdev)) - ixgbevf_down(adapter); - - pci_disable_device(pdev); - - /* Request a slot slot reset. */ - return PCI_ERS_RESULT_NEED_RESET; -} - -/** - * ixgbevf_io_slot_reset - called after the pci bus has been reset. - * @pdev: Pointer to PCI device - * - * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the ixgbevf_resume routine. - */ -static pci_ers_result_t ixgbevf_io_slot_reset(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - - if (pci_enable_device_mem(pdev)) { - dev_err(&pdev->dev, - "Cannot re-enable PCI device after reset.\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - - pci_set_master(pdev); - - ixgbevf_reset(adapter); - - return PCI_ERS_RESULT_RECOVERED; -} - -/** - * ixgbevf_io_resume - called when traffic can start flowing again. - * @pdev: Pointer to PCI device - * - * This callback is called when the error recovery driver tells us that - * its OK to resume normal operation. Implementation resembles the - * second-half of the ixgbevf_resume routine. - */ -static void ixgbevf_io_resume(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - - if (netif_running(netdev)) - ixgbevf_up(adapter); - - netif_device_attach(netdev); -} - -/* PCI Error Recovery (ERS) */ -static struct pci_error_handlers ixgbevf_err_handler = { - .error_detected = ixgbevf_io_error_detected, - .slot_reset = ixgbevf_io_slot_reset, - .resume = ixgbevf_io_resume, -}; - static struct pci_driver ixgbevf_driver = { .name = ixgbevf_driver_name, .id_table = ixgbevf_pci_tbl, .probe = ixgbevf_probe, .remove = __devexit_p(ixgbevf_remove), .shutdown = ixgbevf_shutdown, - .err_handler = &ixgbevf_err_handler }; /** diff --git a/trunk/drivers/net/ethernet/neterion/vxge/vxge-main.c b/trunk/drivers/net/ethernet/neterion/vxge/vxge-main.c index de2190443510..4e20c5f02712 100644 --- a/trunk/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/trunk/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3131,12 +3131,12 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) u64 packets, bytes, multicast; do { - start = u64_stats_fetch_begin_bh(&rxstats->syncp); + start = u64_stats_fetch_begin(&rxstats->syncp); packets = rxstats->rx_frms; multicast = rxstats->rx_mcast; bytes = rxstats->rx_bytes; - } while (u64_stats_fetch_retry_bh(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); net_stats->rx_packets += packets; net_stats->rx_bytes += bytes; @@ -3146,11 +3146,11 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) net_stats->rx_dropped += rxstats->rx_dropped; do { - start = u64_stats_fetch_begin_bh(&txstats->syncp); + start = u64_stats_fetch_begin(&txstats->syncp); packets = txstats->tx_frms; bytes = txstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&txstats->syncp, start)); + } while (u64_stats_fetch_retry(&txstats->syncp, start)); net_stats->tx_packets += packets; net_stats->tx_bytes += bytes; diff --git a/trunk/drivers/net/ethernet/nvidia/forcedeth.c b/trunk/drivers/net/ethernet/nvidia/forcedeth.c index f45def01a98e..8b7c5129c7e1 100644 --- a/trunk/drivers/net/ethernet/nvidia/forcedeth.c +++ b/trunk/drivers/net/ethernet/nvidia/forcedeth.c @@ -3775,7 +3775,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) np->link_timeout = jiffies + LINK_TIMEOUT; } if (events & NVREG_IRQ_RECOVER_ERROR) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); /* disable interrupts on the nic */ writel(NVREG_IRQ_OTHER, base + NvRegIrqMask); pci_push(base); @@ -3785,7 +3785,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) np->recover_error = 1; mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); break; } if (unlikely(i > max_interrupt_work)) { @@ -5182,7 +5182,6 @@ static const struct ethtool_ops ops = { .get_ethtool_stats = nv_get_ethtool_stats, .get_sset_count = nv_get_sset_count, .self_test = nv_self_test, - .get_ts_info = ethtool_op_get_ts_info, }; /* The mgmt unit and driver use a semaphore to access the phy during init */ diff --git a/trunk/drivers/net/ethernet/ti/davinci_emac.c b/trunk/drivers/net/ethernet/ti/davinci_emac.c index fce89a0ab06e..b298ab071e3d 100644 --- a/trunk/drivers/net/ethernet/ti/davinci_emac.c +++ b/trunk/drivers/net/ethernet/ti/davinci_emac.c @@ -57,13 +57,14 @@ #include #include #include -#include #include #include #include #include #include +#include + #include #include @@ -354,6 +355,10 @@ struct emac_priv { void (*int_disable) (void); }; +/* clock frequency for EMAC */ +static struct clk *emac_clk; +static unsigned long emac_bus_frequency; + /* EMAC TX Host Error description strings */ static char *emac_txhost_errcodes[16] = { "No error", "SOP error", "Ownership bit not set in SOP buffer", @@ -1537,8 +1542,6 @@ static int emac_dev_open(struct net_device *ndev) int k = 0; struct emac_priv *priv = netdev_priv(ndev); - pm_runtime_get(&priv->pdev->dev); - netif_carrier_off(ndev); for (cnt = 0; cnt < ETH_ALEN; cnt++) ndev->dev_addr[cnt] = priv->mac_addr[cnt]; @@ -1608,7 +1611,7 @@ static int emac_dev_open(struct net_device *ndev) priv->phy_id); ret = PTR_ERR(priv->phydev); priv->phydev = NULL; - goto err; + return ret; } priv->link = 0; @@ -1649,11 +1652,7 @@ static int emac_dev_open(struct net_device *ndev) res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k-1); m = res->end; } - - ret = -EBUSY; -err: - pm_runtime_put(&priv->pdev->dev); - return ret; + return -EBUSY; } /** @@ -1695,7 +1694,6 @@ static int emac_dev_stop(struct net_device *ndev) if (netif_msg_drv(priv)) dev_notice(emac_dev, "DaVinci EMAC: %s stopped\n", ndev->name); - pm_runtime_put(&priv->pdev->dev); return 0; } @@ -1860,9 +1858,6 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) struct emac_platform_data *pdata; struct device *emac_dev; struct cpdma_params dma_params; - struct clk *emac_clk; - unsigned long emac_bus_frequency; - /* obtain emac clock from kernel */ emac_clk = clk_get(&pdev->dev, NULL); @@ -1871,14 +1866,12 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) return -EBUSY; } emac_bus_frequency = clk_get_rate(emac_clk); - clk_put(emac_clk); - /* TODO: Probe PHY here if possible */ ndev = alloc_etherdev(sizeof(struct emac_priv)); if (!ndev) { rc = -ENOMEM; - goto no_ndev; + goto free_clk; } platform_set_drvdata(pdev, ndev); @@ -1994,13 +1987,15 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) SET_ETHTOOL_OPS(ndev, ðtool_ops); netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT); + clk_enable(emac_clk); + /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); rc = register_netdev(ndev); if (rc) { dev_err(&pdev->dev, "error in register_netdev\n"); rc = -ENODEV; - goto no_irq_res; + goto netdev_reg_err; } @@ -2009,12 +2004,10 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) "(regs: %p, irq: %d)\n", (void *)priv->emac_base_phys, ndev->irq); } - - pm_runtime_enable(&pdev->dev); - pm_runtime_resume(&pdev->dev); - return 0; +netdev_reg_err: + clk_disable(emac_clk); no_irq_res: if (priv->txchan) cpdma_chan_destroy(priv->txchan); @@ -2028,7 +2021,8 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) probe_quit: free_netdev(ndev); -no_ndev: +free_clk: + clk_put(emac_clk); return rc; } @@ -2062,6 +2056,9 @@ static int __devexit davinci_emac_remove(struct platform_device *pdev) iounmap(priv->remap_addr); free_netdev(ndev); + clk_disable(emac_clk); + clk_put(emac_clk); + return 0; } @@ -2073,6 +2070,8 @@ static int davinci_emac_suspend(struct device *dev) if (netif_running(ndev)) emac_dev_stop(ndev); + clk_disable(emac_clk); + return 0; } @@ -2081,6 +2080,8 @@ static int davinci_emac_resume(struct device *dev) struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); + clk_enable(emac_clk); + if (netif_running(ndev)) emac_dev_open(ndev); diff --git a/trunk/drivers/net/loopback.c b/trunk/drivers/net/loopback.c index e2a06fd996d5..32eb94ece6c1 100644 --- a/trunk/drivers/net/loopback.c +++ b/trunk/drivers/net/loopback.c @@ -107,10 +107,10 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, lb_stats = per_cpu_ptr(dev->lstats, i); do { - start = u64_stats_fetch_begin_bh(&lb_stats->syncp); + start = u64_stats_fetch_begin(&lb_stats->syncp); tbytes = lb_stats->bytes; tpackets = lb_stats->packets; - } while (u64_stats_fetch_retry_bh(&lb_stats->syncp, start)); + } while (u64_stats_fetch_retry(&lb_stats->syncp, start)); bytes += tbytes; packets += tpackets; } diff --git a/trunk/drivers/net/tun.c b/trunk/drivers/net/tun.c index c62163e272cd..f3a454c3295a 100644 --- a/trunk/drivers/net/tun.c +++ b/trunk/drivers/net/tun.c @@ -100,8 +100,6 @@ do { \ } while (0) #endif -#define GOODCOPY_LEN 128 - #define FLT_EXACT_COUNT 8 struct tap_filter { unsigned int count; /* Number of addrs. Zero means disabled */ @@ -418,8 +416,6 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Orphan the skb - required as we might hang on to it * for indefinite time. */ - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - goto drop; skb_orphan(skb); /* Enqueue packet */ @@ -606,100 +602,19 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun, return skb; } -/* set skb frags from iovec, this can move to core network code for reuse */ -static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, - int offset, size_t count) -{ - int len = iov_length(from, count) - offset; - int copy = skb_headlen(skb); - int size, offset1 = 0; - int i = 0; - - /* Skip over from offset */ - while (count && (offset >= from->iov_len)) { - offset -= from->iov_len; - ++from; - --count; - } - - /* copy up to skb headlen */ - while (count && (copy > 0)) { - size = min_t(unsigned int, copy, from->iov_len - offset); - if (copy_from_user(skb->data + offset1, from->iov_base + offset, - size)) - return -EFAULT; - if (copy > size) { - ++from; - --count; - offset = 0; - } else - offset += size; - copy -= size; - offset1 += size; - } - - if (len == offset1) - return 0; - - while (count--) { - struct page *page[MAX_SKB_FRAGS]; - int num_pages; - unsigned long base; - unsigned long truesize; - - len = from->iov_len - offset; - if (!len) { - offset = 0; - ++from; - continue; - } - base = (unsigned long)from->iov_base + offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - if (i + size > MAX_SKB_FRAGS) - return -EMSGSIZE; - num_pages = get_user_pages_fast(base, size, 0, &page[i]); - if (num_pages != size) { - for (i = 0; i < num_pages; i++) - put_page(page[i]); - return -EFAULT; - } - truesize = size * PAGE_SIZE; - skb->data_len += len; - skb->len += len; - skb->truesize += truesize; - atomic_add(truesize, &skb->sk->sk_wmem_alloc); - while (len) { - int off = base & ~PAGE_MASK; - int size = min_t(int, len, PAGE_SIZE - off); - __skb_fill_page_desc(skb, i, page[i], off, size); - skb_shinfo(skb)->nr_frags++; - /* increase sk_wmem_alloc */ - base += size; - len -= size; - i++; - } - offset = 0; - ++from; - } - return 0; -} - /* Get packet from user space buffer */ -static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, - const struct iovec *iv, size_t total_len, - size_t count, int noblock) +static ssize_t tun_get_user(struct tun_struct *tun, + const struct iovec *iv, size_t count, + int noblock) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; - size_t len = total_len, align = NET_SKB_PAD; + size_t len = count, align = NET_SKB_PAD; struct virtio_net_hdr gso = { 0 }; int offset = 0; - int copylen; - bool zerocopy = false; - int err; if (!(tun->flags & TUN_NO_PI)) { - if ((len -= sizeof(pi)) > total_len) + if ((len -= sizeof(pi)) > count) return -EINVAL; if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) @@ -708,7 +623,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, } if (tun->flags & TUN_VNET_HDR) { - if ((len -= tun->vnet_hdr_sz) > total_len) + if ((len -= tun->vnet_hdr_sz) > count) return -EINVAL; if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) @@ -730,46 +645,14 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, return -EINVAL; } - if (msg_control) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < offset) - copylen = 0; - else - copylen -= offset; - } else - copylen = 0; - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. - * The rest of the buffer is mapped from userspace. - */ - if (copylen < gso.hdr_len) - copylen = gso.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; - } else - copylen = len; - - skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; return PTR_ERR(skb); } - if (zerocopy) - err = zerocopy_sg_from_iovec(skb, iv, offset, count); - else - err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); - - if (err) { + if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) { tun->dev->stats.rx_dropped++; kfree_skb(skb); return -EFAULT; @@ -843,18 +726,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, skb_shinfo(skb)->gso_segs = 0; } - /* copy skb_ubuf_info for callback when skb has no error */ - if (zerocopy) { - skb_shinfo(skb)->destructor_arg = msg_control; - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; - } - netif_rx_ni(skb); tun->dev->stats.rx_packets++; tun->dev->stats.rx_bytes += len; - return total_len; + return count; } static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, @@ -869,7 +746,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); - result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count, + result = tun_get_user(tun, iv, iov_length(iv, count), file->f_flags & O_NONBLOCK); tun_put(tun); @@ -1083,8 +960,8 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { struct tun_struct *tun = container_of(sock, struct tun_struct, socket); - return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, - m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); + return tun_get_user(tun, m->msg_iov, total_len, + m->msg_flags & MSG_DONTWAIT); } static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, @@ -1254,7 +1131,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; sk->sk_sndbuf = INT_MAX; - sock_set_flag(sk, SOCK_ZEROCOPY); tun_sk(sk)->tun = tun; diff --git a/trunk/drivers/net/virtio_net.c b/trunk/drivers/net/virtio_net.c index 83d2b0c34c5e..1db445b2ecc7 100644 --- a/trunk/drivers/net/virtio_net.c +++ b/trunk/drivers/net/virtio_net.c @@ -704,16 +704,16 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, u64 tpackets, tbytes, rpackets, rbytes; do { - start = u64_stats_fetch_begin_bh(&stats->tx_syncp); + start = u64_stats_fetch_begin(&stats->tx_syncp); tpackets = stats->tx_packets; tbytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start)); + } while (u64_stats_fetch_retry(&stats->tx_syncp, start)); do { - start = u64_stats_fetch_begin_bh(&stats->rx_syncp); + start = u64_stats_fetch_begin(&stats->rx_syncp); rpackets = stats->rx_packets; rbytes = stats->rx_bytes; - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start)); + } while (u64_stats_fetch_retry(&stats->rx_syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; diff --git a/trunk/drivers/net/wimax/i2400m/control.c b/trunk/drivers/net/wimax/i2400m/control.c index 4a01e5c7fe09..2fea02b35b2d 100644 --- a/trunk/drivers/net/wimax/i2400m/control.c +++ b/trunk/drivers/net/wimax/i2400m/control.c @@ -130,7 +130,7 @@ ssize_t i2400m_tlv_match(const struct i2400m_tlv_hdr *tlv, && le16_to_cpu(tlv->length) + sizeof(*tlv) != tlv_size) { size_t size = le16_to_cpu(tlv->length) + sizeof(*tlv); printk(KERN_WARNING "W: tlv type 0x%x mismatched because of " - "size (got %zu vs %zd expected)\n", + "size (got %zu vs %zu expected)\n", tlv_type, size, tlv_size); return size; } @@ -235,7 +235,7 @@ const struct i2400m_tlv_hdr *i2400m_tlv_find( break; if (match > 0) dev_warn(dev, "TLV type 0x%04x found with size " - "mismatch (%zu vs %zd needed)\n", + "mismatch (%zu vs %zu needed)\n", tlv_type, match, tlv_size); } return tlv; diff --git a/trunk/drivers/net/wimax/i2400m/usb-fw.c b/trunk/drivers/net/wimax/i2400m/usb-fw.c index e74664b84925..1fda46c55eb3 100644 --- a/trunk/drivers/net/wimax/i2400m/usb-fw.c +++ b/trunk/drivers/net/wimax/i2400m/usb-fw.c @@ -212,7 +212,7 @@ ssize_t i2400mu_bus_bm_cmd_send(struct i2400m *i2400m, } if (result != cmd_size) { /* all was transferred? */ dev_err(dev, "boot-mode cmd %d: incomplete transfer " - "(%zd vs %zu submitted)\n", opcode, result, cmd_size); + "(%zu vs %zu submitted)\n", opcode, result, cmd_size); result = -EIO; goto error_cmd_size; } diff --git a/trunk/drivers/vhost/net.c b/trunk/drivers/vhost/net.c index 072cbbadbc36..f82a7394756e 100644 --- a/trunk/drivers/vhost/net.c +++ b/trunk/drivers/vhost/net.c @@ -823,14 +823,14 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return vhost_net_set_backend(n, backend.index, backend.fd); case VHOST_GET_FEATURES: - features = VHOST_NET_FEATURES; + features = VHOST_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; - if (features & ~VHOST_NET_FEATURES) + if (features & ~VHOST_FEATURES) return -EOPNOTSUPP; return vhost_net_set_features(n, features); case VHOST_RESET_OWNER: diff --git a/trunk/drivers/vhost/test.c b/trunk/drivers/vhost/test.c index 91d6f060aade..3de00d9fae2e 100644 --- a/trunk/drivers/vhost/test.c +++ b/trunk/drivers/vhost/test.c @@ -261,14 +261,14 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return vhost_test_run(n, test); case VHOST_GET_FEATURES: - features = VHOST_NET_FEATURES; + features = VHOST_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; - if (features & ~VHOST_NET_FEATURES) + if (features & ~VHOST_FEATURES) return -EOPNOTSUPP; return vhost_test_set_features(n, features); case VHOST_RESET_OWNER: diff --git a/trunk/drivers/vhost/vhost.c b/trunk/drivers/vhost/vhost.c index ef82a0d18489..112156f68afb 100644 --- a/trunk/drivers/vhost/vhost.c +++ b/trunk/drivers/vhost/vhost.c @@ -64,7 +64,7 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, return 0; } -void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) +static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { INIT_LIST_HEAD(&work->node); work->fn = fn; @@ -137,7 +137,8 @@ void vhost_poll_flush(struct vhost_poll *poll) vhost_work_flush(poll->dev, &poll->work); } -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) +static inline void vhost_work_queue(struct vhost_dev *dev, + struct vhost_work *work) { unsigned long flags; diff --git a/trunk/drivers/vhost/vhost.h b/trunk/drivers/vhost/vhost.h index 1125af3d27d1..8de1fd5b8efb 100644 --- a/trunk/drivers/vhost/vhost.h +++ b/trunk/drivers/vhost/vhost.h @@ -43,9 +43,6 @@ struct vhost_poll { struct vhost_dev *dev; }; -void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); - void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, unsigned long mask, struct vhost_dev *dev); void vhost_poll_start(struct vhost_poll *poll, struct file *file); @@ -204,8 +201,7 @@ enum { VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | (1ULL << VIRTIO_RING_F_EVENT_IDX) | - (1ULL << VHOST_F_LOG_ALL), - VHOST_NET_FEATURES = VHOST_FEATURES | + (1ULL << VHOST_F_LOG_ALL) | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF), }; diff --git a/trunk/include/linux/net.h b/trunk/include/linux/net.h index 99276c3dc89a..dc95700de5df 100644 --- a/trunk/include/linux/net.h +++ b/trunk/include/linux/net.h @@ -248,7 +248,6 @@ extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); -extern struct socket *sock_from_file(struct file *file, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); diff --git a/trunk/include/linux/skbuff.h b/trunk/include/linux/skbuff.h index d205c4be7f5b..642cb7355df3 100644 --- a/trunk/include/linux/skbuff.h +++ b/trunk/include/linux/skbuff.h @@ -1666,22 +1666,6 @@ static inline void skb_orphan(struct sk_buff *skb) skb->sk = NULL; } -/** - * skb_orphan_frags - orphan the frags contained in a buffer - * @skb: buffer to orphan frags from - * @gfp_mask: allocation mask for replacement pages - * - * For each frag in the SKB which needs a destructor (i.e. has an - * owner) create a copy of that frag and release the original - * page by calling the destructor. - */ -static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) -{ - if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY))) - return 0; - return skb_copy_ubufs(skb, gfp_mask); -} - /** * __skb_queue_purge - empty a list * @list: list to empty diff --git a/trunk/include/net/arp.h b/trunk/include/net/arp.h index 7f7df93f37cd..4617d9841132 100644 --- a/trunk/include/net/arp.h +++ b/trunk/include/net/arp.h @@ -21,6 +21,9 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev struct neighbour *n; u32 hash_val; + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = 0; + hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; diff --git a/trunk/include/net/netprio_cgroup.h b/trunk/include/net/netprio_cgroup.h index 2719dec6b5a8..d58fdec47597 100644 --- a/trunk/include/net/netprio_cgroup.h +++ b/trunk/include/net/netprio_cgroup.h @@ -35,7 +35,7 @@ struct cgroup_netprio_state { extern int net_prio_subsys_id; #endif -extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); +extern void sock_update_netprioidx(struct sock *sk); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -82,7 +82,7 @@ static inline u32 task_netprioidx(struct task_struct *p) #endif /* CONFIG_NETPRIO_CGROUP */ #else -#define sock_update_netprioidx(sk, task) +#define sock_update_netprioidx(sk) #endif #endif /* _NET_CLS_CGROUP_H */ diff --git a/trunk/include/net/route.h b/trunk/include/net/route.h index ace3cb442519..5dcfeb621e06 100644 --- a/trunk/include/net/route.h +++ b/trunk/include/net/route.h @@ -109,7 +109,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); extern void rt_cache_flush(struct net *net, int how); -extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); diff --git a/trunk/include/net/sctp/constants.h b/trunk/include/net/sctp/constants.h index d053d2e99876..942b864f6135 100644 --- a/trunk/include/net/sctp/constants.h +++ b/trunk/include/net/sctp/constants.h @@ -334,7 +334,6 @@ typedef enum { typedef enum { SCTP_TRANSPORT_UP, SCTP_TRANSPORT_DOWN, - SCTP_TRANSPORT_PF, } sctp_transport_cmd_t; /* These are the address scopes defined mainly for IPv4 addresses diff --git a/trunk/include/net/sctp/structs.h b/trunk/include/net/sctp/structs.h index fc5e60016e37..536e439ddf1d 100644 --- a/trunk/include/net/sctp/structs.h +++ b/trunk/include/net/sctp/structs.h @@ -161,12 +161,6 @@ extern struct sctp_globals { int max_retrans_path; int max_retrans_init; - /* Potentially-Failed.Max.Retrans sysctl value - * taken from: - * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 - */ - int pf_retrans; - /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf @@ -264,7 +258,6 @@ extern struct sctp_globals { #define sctp_sndbuf_policy (sctp_globals.sndbuf_policy) #define sctp_rcvbuf_policy (sctp_globals.rcvbuf_policy) #define sctp_max_retrans_path (sctp_globals.max_retrans_path) -#define sctp_pf_retrans (sctp_globals.pf_retrans) #define sctp_max_retrans_init (sctp_globals.max_retrans_init) #define sctp_sack_timeout (sctp_globals.sack_timeout) #define sctp_hb_interval (sctp_globals.hb_interval) @@ -997,15 +990,10 @@ struct sctp_transport { /* This is the max_retrans value for the transport and will * be initialized from the assocs value. This can be changed - * using the SCTP_SET_PEER_ADDR_PARAMS socket option. + * using SCTP_SET_PEER_ADDR_PARAMS socket option. */ __u16 pathmaxrxt; - /* This is the partially failed retrans value for the transport - * and will be initialized from the assocs value. This can be changed - * using the SCTP_PEER_ADDR_THLDS socket option - */ - int pf_retrans; /* PMTU : The current known path MTU. */ __u32 pathmtu; @@ -1676,12 +1664,6 @@ struct sctp_association { */ int max_retrans; - /* This is the partially failed retrans value for the transport - * and will be initialized from the assocs value. This can be - * changed using the SCTP_PEER_ADDR_THLDS socket option - */ - int pf_retrans; - /* Maximum number of times the endpoint will retransmit INIT */ __u16 max_init_attempts; diff --git a/trunk/include/net/sctp/user.h b/trunk/include/net/sctp/user.h index 1b02d7ad453b..0842ef00b2fe 100644 --- a/trunk/include/net/sctp/user.h +++ b/trunk/include/net/sctp/user.h @@ -93,7 +93,6 @@ typedef __s32 sctp_assoc_t; #define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ #define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ #define SCTP_AUTO_ASCONF 30 -#define SCTP_PEER_ADDR_THLDS 31 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -650,7 +649,6 @@ struct sctp_paddrinfo { */ enum sctp_spinfo_state { SCTP_INACTIVE, - SCTP_PF, SCTP_ACTIVE, SCTP_UNCONFIRMED, SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ @@ -743,13 +741,4 @@ typedef struct { int sd; } sctp_peeloff_arg_t; -/* - * Peer Address Thresholds socket option - */ -struct sctp_paddrthlds { - sctp_assoc_t spt_assoc_id; - struct sockaddr_storage spt_address; - __u16 spt_pathmaxrxt; - __u16 spt_pathpfthld; -}; #endif /* __net_sctp_user_h__ */ diff --git a/trunk/net/bridge/br_device.c b/trunk/net/bridge/br_device.c index 333484537600..f4be1bbfef26 100644 --- a/trunk/net/bridge/br_device.c +++ b/trunk/net/bridge/br_device.c @@ -127,9 +127,9 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, const struct br_cpu_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { - start = u64_stats_fetch_begin_bh(&bstats->syncp); + start = u64_stats_fetch_begin(&bstats->syncp); memcpy(&tmp, bstats, sizeof(tmp)); - } while (u64_stats_fetch_retry_bh(&bstats->syncp, start)); + } while (u64_stats_fetch_retry(&bstats->syncp, start)); sum.tx_bytes += tmp.tx_bytes; sum.tx_packets += tmp.tx_packets; sum.rx_bytes += tmp.rx_bytes; diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index cca02ae7a844..d70e4a3a49f2 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -1632,8 +1632,6 @@ static inline int deliver_skb(struct sk_buff *skb, struct packet_type *pt_prev, struct net_device *orig_dev) { - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - return -ENOMEM; atomic_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } @@ -3264,10 +3262,7 @@ static int __netif_receive_skb(struct sk_buff *skb) } if (pt_prev) { - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - ret = -ENOMEM; - else - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); diff --git a/trunk/net/core/netprio_cgroup.c b/trunk/net/core/netprio_cgroup.c index 63d15e8f80e9..b2e9caa1ad1a 100644 --- a/trunk/net/core/netprio_cgroup.c +++ b/trunk/net/core/netprio_cgroup.c @@ -25,8 +25,6 @@ #include #include -#include - #define PRIOIDX_SZ 128 static unsigned long prioidx_map[PRIOIDX_SZ]; @@ -274,56 +272,6 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, return ret; } -void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) -{ - struct task_struct *p; - char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); - - if (!tmp) { - pr_warn("Unable to attach cgrp due to alloc failure!\n"); - return; - } - - cgroup_taskset_for_each(p, cgrp, tset) { - unsigned int fd; - struct fdtable *fdt; - struct files_struct *files; - - task_lock(p); - files = p->files; - if (!files) { - task_unlock(p); - continue; - } - - rcu_read_lock(); - fdt = files_fdtable(files); - for (fd = 0; fd < fdt->max_fds; fd++) { - char *path; - struct file *file; - struct socket *sock; - unsigned long s; - int rv, err = 0; - - file = fcheck_files(files, fd); - if (!file) - continue; - - path = d_path(&file->f_path, tmp, PAGE_SIZE); - rv = sscanf(path, "socket:[%lu]", &s); - if (rv <= 0) - continue; - - sock = sock_from_file(file, &err); - if (!err) - sock_update_netprioidx(sock->sk, p); - } - rcu_read_unlock(); - task_unlock(p); - } - kfree(tmp); -} - static struct cftype ss_files[] = { { .name = "prioidx", @@ -341,7 +289,6 @@ struct cgroup_subsys net_prio_subsys = { .name = "net_prio", .create = cgrp_create, .destroy = cgrp_destroy, - .attach = net_prio_attach, #ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, #endif diff --git a/trunk/net/core/rtnetlink.c b/trunk/net/core/rtnetlink.c index 334b930e0de3..5bb1ebca2eb0 100644 --- a/trunk/net/core/rtnetlink.c +++ b/trunk/net/core/rtnetlink.c @@ -892,9 +892,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || -#ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || -#endif (dev->ifindex != dev->iflink && nla_put_u32(skb, IFLA_LINK, dev->iflink)) || (dev->master && diff --git a/trunk/net/core/skbuff.c b/trunk/net/core/skbuff.c index 368f65c15e4f..ccfcb7d8711e 100644 --- a/trunk/net/core/skbuff.c +++ b/trunk/net/core/skbuff.c @@ -784,7 +784,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } -EXPORT_SYMBOL_GPL(skb_copy_ubufs); + /** * skb_clone - duplicate an sk_buff @@ -804,8 +804,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; - if (skb_orphan_frags(skb, gfp_mask)) - return NULL; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + return NULL; + } n = skb + 1; if (skb->fclone == SKB_FCLONE_ORIG && @@ -925,10 +927,12 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) if (skb_shinfo(skb)->nr_frags) { int i; - if (skb_orphan_frags(skb, gfp_mask)) { - kfree_skb(n); - n = NULL; - goto out; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) { + kfree_skb(n); + n = NULL; + goto out; + } } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; @@ -1001,8 +1005,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, */ if (skb_cloned(skb)) { /* copy this zero copy skb frags */ - if (skb_orphan_frags(skb, gfp_mask)) - goto nofrags; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + goto nofrags; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); diff --git a/trunk/net/core/sock.c b/trunk/net/core/sock.c index 2676a88f533e..24039ac12426 100644 --- a/trunk/net/core/sock.c +++ b/trunk/net/core/sock.c @@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk) } EXPORT_SYMBOL(sock_update_classid); -void sock_update_netprioidx(struct sock *sk, struct task_struct *task) +void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(task); + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); - sock_update_netprioidx(sk, current); + sock_update_netprioidx(sk); } return sk; diff --git a/trunk/net/ipv4/fib_frontend.c b/trunk/net/ipv4/fib_frontend.c index b83203658ee3..f277cf0e6321 100644 --- a/trunk/net/ipv4/fib_frontend.c +++ b/trunk/net/ipv4/fib_frontend.c @@ -1072,11 +1072,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), 0); break; case NETDEV_UNREGISTER_BATCH: - /* The batch unregister is only called on the first - * device in the list of devices being unregistered. - * Therefore we should not pass dev_net(dev) in here. - */ - rt_cache_flush_batch(NULL); break; } return NOTIFY_DONE; diff --git a/trunk/net/ipv4/ip_output.c b/trunk/net/ipv4/ip_output.c index 665abbb7122a..c528f841ca4b 100644 --- a/trunk/net/ipv4/ip_output.c +++ b/trunk/net/ipv4/ip_output.c @@ -1476,8 +1476,7 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { .sk_allocation = GFP_ATOMIC, .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), }, - .pmtudisc = IP_PMTUDISC_WANT, - .uc_ttl = -1, + .pmtudisc = IP_PMTUDISC_WANT, }; void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index d547f6fae20d..6d6146d31f22 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -133,10 +133,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; -static int rt_chain_length_max __read_mostly = 20; - -static struct delayed_work expires_work; -static unsigned long expires_ljiffies; /* * Interface to generic destination cache. @@ -152,7 +148,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -172,7 +167,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), - .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, @@ -209,184 +203,30 @@ const __u8 ip_tos2prio[16] = { }; EXPORT_SYMBOL(ip_tos2prio); -/* - * Route cache. - */ - -/* The locking scheme is rather straight forward: - * - * 1) Read-Copy Update protects the buckets of the central route hash. - * 2) Only writers remove entries, and they hold the lock - * as they look at rtable reference counts. - * 3) Only readers acquire references to rtable entries, - * they do so with atomic increments and with the - * lock held. - */ - -struct rt_hash_bucket { - struct rtable __rcu *chain; -}; - -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) -/* - * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks - * The size of this table is a power of two and depends on the number of CPUS. - * (on lockdep we have a quite big spinlock_t, so keep the size down there) - */ -#ifdef CONFIG_LOCKDEP -# define RT_HASH_LOCK_SZ 256 -#else -# if NR_CPUS >= 32 -# define RT_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define RT_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define RT_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define RT_HASH_LOCK_SZ 512 -# else -# define RT_HASH_LOCK_SZ 256 -# endif -#endif - -static spinlock_t *rt_hash_locks; -# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] - -static __init void rt_hash_lock_init(void) -{ - int i; - - rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, - GFP_KERNEL); - if (!rt_hash_locks) - panic("IP: failed to allocate rt_hash_locks\n"); - - for (i = 0; i < RT_HASH_LOCK_SZ; i++) - spin_lock_init(&rt_hash_locks[i]); -} -#else -# define rt_hash_lock_addr(slot) NULL - -static inline void rt_hash_lock_init(void) -{ -} -#endif - -static struct rt_hash_bucket *rt_hash_table __read_mostly; -static unsigned int rt_hash_mask __read_mostly; -static unsigned int rt_hash_log __read_mostly; - static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, - int genid) -{ - return jhash_3words((__force u32)daddr, (__force u32)saddr, - idx, genid) - & rt_hash_mask; -} - static inline int rt_genid(struct net *net) { return atomic_read(&net->ipv4.rt_genid); } #ifdef CONFIG_PROC_FS -struct rt_cache_iter_state { - struct seq_net_private p; - int bucket; - int genid; -}; - -static struct rtable *rt_cache_get_first(struct seq_file *seq) -{ - struct rt_cache_iter_state *st = seq->private; - struct rtable *r = NULL; - - for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) - continue; - rcu_read_lock_bh(); - r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); - while (r) { - if (dev_net(r->dst.dev) == seq_file_net(seq) && - r->rt_genid == st->genid) - return r; - r = rcu_dereference_bh(r->dst.rt_next); - } - rcu_read_unlock_bh(); - } - return r; -} - -static struct rtable *__rt_cache_get_next(struct seq_file *seq, - struct rtable *r) -{ - struct rt_cache_iter_state *st = seq->private; - - r = rcu_dereference_bh(r->dst.rt_next); - while (!r) { - rcu_read_unlock_bh(); - do { - if (--st->bucket < 0) - return NULL; - } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); - rcu_read_lock_bh(); - r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); - } - return r; -} - -static struct rtable *rt_cache_get_next(struct seq_file *seq, - struct rtable *r) -{ - struct rt_cache_iter_state *st = seq->private; - while ((r = __rt_cache_get_next(seq, r)) != NULL) { - if (dev_net(r->dst.dev) != seq_file_net(seq)) - continue; - if (r->rt_genid == st->genid) - break; - } - return r; -} - -static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) -{ - struct rtable *r = rt_cache_get_first(seq); - - if (r) - while (pos && (r = rt_cache_get_next(seq, r))) - --pos; - return pos ? NULL : r; -} - static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { - struct rt_cache_iter_state *st = seq->private; if (*pos) - return rt_cache_get_idx(seq, *pos - 1); - st->genid = rt_genid(seq_file_net(seq)); + return NULL; return SEQ_START_TOKEN; } static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct rtable *r; - - if (v == SEQ_START_TOKEN) - r = rt_cache_get_first(seq); - else - r = rt_cache_get_next(seq, v); ++*pos; - return r; + return NULL; } static void rt_cache_seq_stop(struct seq_file *seq, void *v) { - if (v && v != SEQ_START_TOKEN) - rcu_read_unlock_bh(); } static int rt_cache_seq_show(struct seq_file *seq, void *v) @@ -396,24 +236,6 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" "HHUptod\tSpecDst"); - else { - struct rtable *r = v; - int len; - - seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" - "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->dst.dev ? r->dst.dev->name : "*", - (__force u32)r->rt_dst, - (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->dst.__refcnt), - r->dst.__use, 0, (__force u32)r->rt_src, - dst_metric_advmss(&r->dst) + 40, - dst_metric(&r->dst, RTAX_WINDOW), 0, - r->rt_key_tos, - -1, 0, 0, &len); - - seq_printf(seq, "%*s\n", 127 - len, ""); - } return 0; } @@ -426,8 +248,7 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &rt_cache_seq_ops, - sizeof(struct rt_cache_iter_state)); + return seq_open(file, &rt_cache_seq_ops); } static const struct file_operations rt_cache_seq_fops = { @@ -435,7 +256,7 @@ static const struct file_operations rt_cache_seq_fops = { .open = rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = seq_release, }; @@ -625,262 +446,11 @@ static inline int ip_rt_proc_init(void) } #endif /* CONFIG_PROC_FS */ -static inline void rt_free(struct rtable *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - -static inline void rt_drop(struct rtable *rt) -{ - ip_rt_put(rt); - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - -static inline int rt_fast_clean(struct rtable *rth) -{ - /* Kill broadcast/multicast entries very aggresively, if they - collide in hash table with more useful entries */ - return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rt_is_input_route(rth) && rth->dst.rt_next; -} - -static inline int rt_valuable(struct rtable *rth) -{ - return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->dst.expires; -} - -static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) -{ - unsigned long age; - int ret = 0; - - if (atomic_read(&rth->dst.__refcnt)) - goto out; - - age = jiffies - rth->dst.lastuse; - if ((age <= tmo1 && !rt_fast_clean(rth)) || - (age <= tmo2 && rt_valuable(rth))) - goto out; - ret = 1; -out: return ret; -} - -/* Bits of score are: - * 31: very valuable - * 30: not quite useless - * 29..0: usage counter - */ -static inline u32 rt_score(struct rtable *rt) -{ - u32 score = jiffies - rt->dst.lastuse; - - score = ~score & ~(3<<30); - - if (rt_valuable(rt)) - score |= (1<<31); - - if (rt_is_output_route(rt) || - !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) - score |= (1<<30); - - return score; -} - -static inline bool rt_caching(const struct net *net) -{ - return net->ipv4.current_rt_cache_rebuild_count <= - net->ipv4.sysctl_rt_cache_rebuild_count; -} - -static inline bool compare_hash_inputs(const struct rtable *rt1, - const struct rtable *rt2) -{ - return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | - ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); -} - -static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) -{ - return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | - ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_mark ^ rt2->rt_mark) | - (rt1->rt_key_tos ^ rt2->rt_key_tos) | - (rt1->rt_route_iif ^ rt2->rt_route_iif) | - (rt1->rt_oif ^ rt2->rt_oif)) == 0; -} - -static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) -{ - return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); -} - static inline int rt_is_expired(struct rtable *rth) { return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perform a full scan of hash table and free all entries. - * Can be called by a softirq or a process. - * In the later case, we want to be reschedule if necessary - */ -static void rt_do_flush(struct net *net, int process_context) -{ - unsigned int i; - struct rtable *rth, *next; - - for (i = 0; i <= rt_hash_mask; i++) { - struct rtable __rcu **pprev; - struct rtable *list; - - if (process_context && need_resched()) - cond_resched(); - rth = rcu_access_pointer(rt_hash_table[i].chain); - if (!rth) - continue; - - spin_lock_bh(rt_hash_lock_addr(i)); - - list = NULL; - pprev = &rt_hash_table[i].chain; - rth = rcu_dereference_protected(*pprev, - lockdep_is_held(rt_hash_lock_addr(i))); - - while (rth) { - next = rcu_dereference_protected(rth->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i))); - - if (!net || - net_eq(dev_net(rth->dst.dev), net)) { - rcu_assign_pointer(*pprev, next); - rcu_assign_pointer(rth->dst.rt_next, list); - list = rth; - } else { - pprev = &rth->dst.rt_next; - } - rth = next; - } - - spin_unlock_bh(rt_hash_lock_addr(i)); - - for (; list; list = next) { - next = rcu_dereference_protected(list->dst.rt_next, 1); - rt_free(list); - } - } -} - -/* - * While freeing expired entries, we compute average chain length - * and standard deviation, using fixed-point arithmetic. - * This to have an estimation of rt_chain_length_max - * rt_chain_length_max = max(elasticity, AVG + 4*SD) - * We use 3 bits for frational part, and 29 (or 61) for magnitude. - */ - -#define FRACT_BITS 3 -#define ONE (1UL << FRACT_BITS) - -/* - * Given a hash chain and an item in this hash chain, - * find if a previous entry has the same hash_inputs - * (but differs on tos, mark or oif) - * Returns 0 if an alias is found. - * Returns ONE if rth has no alias before itself. - */ -static int has_noalias(const struct rtable *head, const struct rtable *rth) -{ - const struct rtable *aux = head; - - while (aux != rth) { - if (compare_hash_inputs(aux, rth)) - return 0; - aux = rcu_dereference_protected(aux->dst.rt_next, 1); - } - return ONE; -} - -static void rt_check_expire(void) -{ - static unsigned int rover; - unsigned int i = rover, goal; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long samples = 0; - unsigned long sum = 0, sum2 = 0; - unsigned long delta; - u64 mult; - - delta = jiffies - expires_ljiffies; - expires_ljiffies = jiffies; - mult = ((u64)delta) << rt_hash_log; - if (ip_rt_gc_timeout > 1) - do_div(mult, ip_rt_gc_timeout); - goal = (unsigned int)mult; - if (goal > rt_hash_mask) - goal = rt_hash_mask + 1; - for (; goal > 0; goal--) { - unsigned long tmo = ip_rt_gc_timeout; - unsigned long length; - - i = (i + 1) & rt_hash_mask; - rthp = &rt_hash_table[i].chain; - - if (need_resched()) - cond_resched(); - - samples++; - - if (rcu_dereference_raw(*rthp) == NULL) - continue; - length = 0; - spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { - prefetch(rth->dst.rt_next); - if (rt_is_expired(rth) || - rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - - /* We only count entries on a chain with equal - * hash inputs once so that entries for - * different QOS levels, and other non-hash - * input attributes don't unfairly skew the - * length computation - */ - tmo >>= 1; - rthp = &rth->dst.rt_next; - length += has_noalias(rt_hash_table[i].chain, rth); - } - spin_unlock_bh(rt_hash_lock_addr(i)); - sum += length; - sum2 += length*length; - } - if (samples) { - unsigned long avg = sum / samples; - unsigned long sd = int_sqrt(sum2 / samples - avg*avg); - rt_chain_length_max = max_t(unsigned long, - ip_rt_gc_elasticity, - (avg + 4*sd) >> FRACT_BITS); - } - rover = i; -} - -/* - * rt_worker_func() is run in process context. - * we call rt_check_expire() to scan part of the hash table - */ -static void rt_worker_func(struct work_struct *work) -{ - rt_check_expire(); - schedule_delayed_work(&expires_work, ip_rt_gc_interval); -} - /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -902,167 +472,6 @@ static void rt_cache_invalidate(struct net *net) void rt_cache_flush(struct net *net, int delay) { rt_cache_invalidate(net); - if (delay >= 0) - rt_do_flush(net, !in_softirq()); -} - -/* Flush previous cache invalidated entries from the cache */ -void rt_cache_flush_batch(struct net *net) -{ - rt_do_flush(net, !in_softirq()); -} - -static void rt_emergency_hash_rebuild(struct net *net) -{ - net_warn_ratelimited("Route hash chain too long!\n"); - rt_cache_invalidate(net); -} - -/* - Short description of GC goals. - - We want to build algorithm, which will keep routing cache - at some equilibrium point, when number of aged off entries - is kept approximately equal to newly generated ones. - - Current expiration strength is variable "expire". - We try to adjust it dynamically, so that if networking - is idle expires is large enough to keep enough of warm entries, - and when load increases it reduces to limit cache size. - */ - -static int rt_garbage_collect(struct dst_ops *ops) -{ - static unsigned long expire = RT_GC_TIMEOUT; - static unsigned long last_gc; - static int rover; - static int equilibrium; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long now = jiffies; - int goal; - int entries = dst_entries_get_fast(&ipv4_dst_ops); - - /* - * Garbage collection is pretty expensive, - * do not make it too frequently. - */ - - RT_CACHE_STAT_INC(gc_total); - - if (now - last_gc < ip_rt_gc_min_interval && - entries < ip_rt_max_size) { - RT_CACHE_STAT_INC(gc_ignored); - goto out; - } - - entries = dst_entries_get_slow(&ipv4_dst_ops); - /* Calculate number of entries, which we want to expire now. */ - goal = entries - (ip_rt_gc_elasticity << rt_hash_log); - if (goal <= 0) { - if (equilibrium < ipv4_dst_ops.gc_thresh) - equilibrium = ipv4_dst_ops.gc_thresh; - goal = entries - equilibrium; - if (goal > 0) { - equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); - goal = entries - equilibrium; - } - } else { - /* We are in dangerous area. Try to reduce cache really - * aggressively. - */ - goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); - equilibrium = entries - goal; - } - - if (now - last_gc >= ip_rt_gc_min_interval) - last_gc = now; - - if (goal <= 0) { - equilibrium += goal; - goto work_done; - } - - do { - int i, k; - - for (i = rt_hash_mask, k = rover; i >= 0; i--) { - unsigned long tmo = expire; - - k = (k + 1) & rt_hash_mask; - rthp = &rt_hash_table[k].chain; - spin_lock_bh(rt_hash_lock_addr(k)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { - if (!rt_is_expired(rth) && - !rt_may_expire(rth, tmo, expire)) { - tmo >>= 1; - rthp = &rth->dst.rt_next; - continue; - } - *rthp = rth->dst.rt_next; - rt_free(rth); - goal--; - } - spin_unlock_bh(rt_hash_lock_addr(k)); - if (goal <= 0) - break; - } - rover = k; - - if (goal <= 0) - goto work_done; - - /* Goal is not achieved. We stop process if: - - - if expire reduced to zero. Otherwise, expire is halfed. - - if table is not full. - - if we are called from interrupt. - - jiffies check is just fallback/debug loop breaker. - We will not spin here for long time in any case. - */ - - RT_CACHE_STAT_INC(gc_goal_miss); - - if (expire == 0) - break; - - expire >>= 1; - - if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - } while (!in_softirq() && time_before_eq(jiffies, now)); - - if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - net_warn_ratelimited("dst cache overflow\n"); - RT_CACHE_STAT_INC(gc_dst_overflow); - return 1; - -work_done: - expire += ip_rt_gc_min_interval; - if (expire > ip_rt_gc_timeout || - dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || - dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) - expire = ip_rt_gc_timeout; -out: return 0; -} - -/* - * Returns number of entries in a hash chain that have different hash_inputs - */ -static int slow_chain_length(const struct rtable *head) -{ - int length = 0; - const struct rtable *rth = head; - - while (rth) { - length += has_noalias(head, rth); - rth = rcu_dereference_protected(rth->dst.rt_next, 1); - } - return length >> FRACT_BITS; } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -1086,139 +495,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, - struct sk_buff *skb, int ifindex) -{ - struct rtable *rth, *cand; - struct rtable __rcu **rthp, **candp; - unsigned long now; - u32 min_score; - int chain_length; - -restart: - chain_length = 0; - min_score = ~(u32)0; - cand = NULL; - candp = NULL; - now = jiffies; - - if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { - /* - * If we're not caching, just tell the caller we - * were successful and don't touch the route. The - * caller hold the sole reference to the cache entry, and - * it will be released when the caller is done with it. - * If we drop it here, the callers have no way to resolve routes - * when we're not caching. Instead, just point *rp at rt, so - * the caller gets a single use out of the route - * Note that we do rt_free on this new route entry, so that - * once its refcount hits zero, we are still able to reap it - * (Thanks Alexey) - * Note: To avoid expensive rcu stuff for this uncached dst, - * we set DST_NOCACHE so that dst_release() can free dst without - * waiting a grace period. - */ - - rt->dst.flags |= DST_NOCACHE; - goto skip_hashing; - } - - rthp = &rt_hash_table[hash].chain; - - spin_lock_bh(rt_hash_lock_addr(hash)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { - if (rt_is_expired(rth)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - if (compare_keys(rth, rt) && compare_netns(rth, rt)) { - /* Put it first */ - *rthp = rth->dst.rt_next; - /* - * Since lookup is lockfree, the deletion - * must be visible to another weakly ordered CPU before - * the insertion at the start of the hash chain. - */ - rcu_assign_pointer(rth->dst.rt_next, - rt_hash_table[hash].chain); - /* - * Since lookup is lockfree, the update writes - * must be ordered for consistency on SMP. - */ - rcu_assign_pointer(rt_hash_table[hash].chain, rth); - - dst_use(&rth->dst, now); - spin_unlock_bh(rt_hash_lock_addr(hash)); - - rt_drop(rt); - if (skb) - skb_dst_set(skb, &rth->dst); - return rth; - } - - if (!atomic_read(&rth->dst.__refcnt)) { - u32 score = rt_score(rth); - - if (score <= min_score) { - cand = rth; - candp = rthp; - min_score = score; - } - } - - chain_length++; - - rthp = &rth->dst.rt_next; - } - - if (cand) { - /* ip_rt_gc_elasticity used to be average length of chain - * length, when exceeded gc becomes really aggressive. - * - * The second limit is less certain. At the moment it allows - * only 2 entries per bucket. We will see. - */ - if (chain_length > ip_rt_gc_elasticity) { - *candp = cand->dst.rt_next; - rt_free(cand); - } - } else { - if (chain_length > rt_chain_length_max && - slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { - struct net *net = dev_net(rt->dst.dev); - int num = ++net->ipv4.current_rt_cache_rebuild_count; - if (!rt_caching(net)) { - pr_warn("%s: %d rebuilds is over limit, route caching disabled\n", - rt->dst.dev->name, num); - } - rt_emergency_hash_rebuild(net); - spin_unlock_bh(rt_hash_lock_addr(hash)); - - hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, - ifindex, rt_genid(net)); - goto restart; - } - } - - rt->dst.rt_next = rt_hash_table[hash].chain; - - /* - * Since lookup is lockfree, we must make sure - * previous writes to rt are committed to memory - * before making rt visible to other CPUS. - */ - rcu_assign_pointer(rt_hash_table[hash].chain, rt); - - spin_unlock_bh(rt_hash_lock_addr(hash)); - -skip_hashing: - if (skb) - skb_dst_set(skb, &rt->dst); - return rt; -} - /* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. @@ -1255,26 +531,6 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) } EXPORT_SYMBOL(__ip_select_ident); -static void rt_del(unsigned int hash, struct rtable *rt) -{ - struct rtable __rcu **rthp; - struct rtable *aux; - - rthp = &rt_hash_table[hash].chain; - spin_lock_bh(rt_hash_lock_addr(hash)); - ip_rt_put(rt); - while ((aux = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { - if (aux == rt || rt_is_expired(aux)) { - *rthp = aux->dst.rt_next; - rt_free(aux); - continue; - } - rthp = &aux->dst.rt_next; - } - spin_unlock_bh(rt_hash_lock_addr(hash)); -} - static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, @@ -1518,10 +774,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->dst.expires) { - unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, - rt->rt_oif, - rt_genid(dev_net(dst->dev))); - rt_del(hash, rt); + ip_rt_put(rt); ret = NULL; } } @@ -1969,7 +1222,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm) { return dst_alloc(&ipv4_dst_ops, dev, 1, -1, - DST_HOST | + DST_HOST | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1978,7 +1231,6 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { - unsigned int hash; struct rtable *rth; struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; @@ -2042,9 +1294,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); - hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - return IS_ERR(rth) ? PTR_ERR(rth) : 0; + skb_dst_set(skb, &rth->dst); + return 0; e_nobufs: return -ENOBUFS; @@ -2176,7 +1427,6 @@ static int ip_mkroute_input(struct sk_buff *skb, { struct rtable *rth = NULL; int err; - unsigned int hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) @@ -2188,12 +1438,7 @@ static int ip_mkroute_input(struct sk_buff *skb, if (err) return err; - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl4->flowi4_iif, - rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); - if (IS_ERR(rth)) - return PTR_ERR(rth); + skb_dst_set(skb, &rth->dst); return 0; } @@ -2217,7 +1462,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned int flags = 0; u32 itag = 0; struct rtable *rth; - unsigned int hash; int err = -EINVAL; struct net *net = dev_net(dev); @@ -2339,11 +1583,8 @@ out: return err; rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); + skb_dst_set(skb, &rth->dst); err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); goto out; no_route: @@ -2382,46 +1623,10 @@ out: return err; int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, bool noref) { - struct rtable *rth; - unsigned int hash; - int iif = dev->ifindex; - struct net *net; int res; - net = dev_net(dev); - rcu_read_lock(); - if (!rt_caching(net)) - goto skip_cache; - - tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif, rt_genid(net)); - - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->dst.rt_next)) { - if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | - ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | - (rth->rt_route_iif ^ iif) | - (rth->rt_key_tos ^ tos)) == 0 && - rth->rt_mark == skb->mark && - net_eq(dev_net(rth->dst.dev), net) && - !rt_is_expired(rth)) { - if (noref) { - dst_use_noref(&rth->dst, jiffies); - skb_dst_set_noref(skb, &rth->dst); - } else { - dst_use(&rth->dst, jiffies); - skb_dst_set(skb, &rth->dst); - } - RT_CACHE_STAT_INC(in_hit); - rcu_read_unlock(); - return 0; - } - RT_CACHE_STAT_INC(in_hlist_search); - } - -skip_cache: /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2563,10 +1768,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, /* * Major route resolver routine. - * called with rcu_read_lock(); */ -static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) +struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2746,57 +1950,11 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) make_route: rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, tos, dev_out, flags); - if (!IS_ERR(rth)) { - unsigned int hash; - - hash = rt_hash(orig_daddr, orig_saddr, orig_oif, - rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, orig_oif); - } out: rcu_read_unlock(); return rth; } - -struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) -{ - struct rtable *rth; - unsigned int hash; - - if (!rt_caching(net)) - goto slow_output; - - hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); - - rcu_read_lock_bh(); - for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; - rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->rt_key_dst == flp4->daddr && - rth->rt_key_src == flp4->saddr && - rt_is_output_route(rth) && - rth->rt_oif == flp4->flowi4_oif && - rth->rt_mark == flp4->flowi4_mark && - !((rth->rt_key_tos ^ flp4->flowi4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK)) && - net_eq(dev_net(rth->dst.dev), net) && - !rt_is_expired(rth)) { - dst_use(&rth->dst, jiffies); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - if (!flp4->saddr) - flp4->saddr = rth->rt_src; - if (!flp4->daddr) - flp4->daddr = rth->rt_dst; - return rth; - } - RT_CACHE_STAT_INC(out_hlist_search); - } - rcu_read_unlock_bh(); - -slow_output: - return ip_route_output_slow(net, flp4); -} EXPORT_SYMBOL_GPL(__ip_route_output_key); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) @@ -3106,43 +2264,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct rtable *rt; - int h, s_h; - int idx, s_idx; - struct net *net; - - net = sock_net(skb->sk); - - s_h = cb->args[0]; - if (s_h < 0) - s_h = 0; - s_idx = idx = cb->args[1]; - for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { - if (!rt_hash_table[h].chain) - continue; - rcu_read_lock_bh(); - for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { - if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) - continue; - if (rt_is_expired(rt)) - continue; - skb_dst_set_noref(skb, &rt->dst); - if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) <= 0) { - skb_dst_drop(skb); - rcu_read_unlock_bh(); - goto done; - } - skb_dst_drop(skb); - } - rcu_read_unlock_bh(); - } - -done: - cb->args[0] = h; - cb->args[1] = idx; return skb->len; } @@ -3376,22 +2497,6 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ -static __initdata unsigned long rhash_entries; -static int __init set_rhash_entries(char *str) -{ - ssize_t ret; - - if (!str) - return 0; - - ret = kstrtoul(str, 0, &rhash_entries); - if (ret) - return 0; - - return 1; -} -__setup("rhash_entries=", set_rhash_entries); - int __init ip_rt_init(void) { int rc = 0; @@ -3414,31 +2519,12 @@ int __init ip_rt_init(void) if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); - rt_hash_table = (struct rt_hash_bucket *) - alloc_large_system_hash("IP route cache", - sizeof(struct rt_hash_bucket), - rhash_entries, - (totalram_pages >= 128 * 1024) ? - 15 : 17, - 0, - &rt_hash_log, - &rt_hash_mask, - 0, - rhash_entries ? 0 : 512 * 1024); - memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); - rt_hash_lock_init(); - - ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); - ip_rt_max_size = (rt_hash_mask + 1) * 16; + ipv4_dst_ops.gc_thresh = ~0; + ip_rt_max_size = INT_MAX; devinet_init(); ip_fib_init(); - INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); - expires_ljiffies = jiffies; - schedule_delayed_work(&expires_work, - net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (ip_rt_proc_init()) pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRM diff --git a/trunk/net/openvswitch/actions.c b/trunk/net/openvswitch/actions.c index f3f96badf5aa..48badffaafc1 100644 --- a/trunk/net/openvswitch/actions.c +++ b/trunk/net/openvswitch/actions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2012 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/datapath.c b/trunk/net/openvswitch/datapath.c index d8277d29e710..2c74daa5aca5 100644 --- a/trunk/net/openvswitch/datapath.c +++ b/trunk/net/openvswitch/datapath.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2012 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -263,15 +263,14 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; struct dp_upcall_info later_info; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); - if (IS_ERR(segs)) - return PTR_ERR(segs); + if (IS_ERR(skb)) + return PTR_ERR(skb); /* Queue all of the segments. */ skb = segs; @@ -280,7 +279,7 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, if (err) break; - if (skb == segs && gso_type & SKB_GSO_UDP) { + if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) { /* The initial flow key extracted by ovs_flow_extract() * in this case is for a first fragment, so we need to * properly mark later fragments. @@ -1650,9 +1649,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (!err && a[OVS_VPORT_ATTR_OPTIONS]) err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); - if (err) - goto exit_unlock; - if (a[OVS_VPORT_ATTR_UPCALL_PID]) + if (!err && a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, diff --git a/trunk/net/openvswitch/datapath.h b/trunk/net/openvswitch/datapath.h index c1105c147531..c73370cc1f02 100644 --- a/trunk/net/openvswitch/datapath.h +++ b/trunk/net/openvswitch/datapath.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/dp_notify.c b/trunk/net/openvswitch/dp_notify.c index 36dcee8fc84a..46736518c453 100644 --- a/trunk/net/openvswitch/dp_notify.c +++ b/trunk/net/openvswitch/dp_notify.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/flow.c b/trunk/net/openvswitch/flow.c index b7f38b161909..6d4d8097cf96 100644 --- a/trunk/net/openvswitch/flow.c +++ b/trunk/net/openvswitch/flow.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -182,8 +182,7 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) { u8 tcp_flags = 0; - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && + if (flow->key.eth.type == htons(ETH_P_IP) && flow->key.ip.proto == IPPROTO_TCP && likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { u8 *tcp = (u8 *)tcp_hdr(skb); diff --git a/trunk/net/openvswitch/flow.h b/trunk/net/openvswitch/flow.h index 9b75617ca4e0..2747dc2c4ac1 100644 --- a/trunk/net/openvswitch/flow.h +++ b/trunk/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/vport-internal_dev.c b/trunk/net/openvswitch/vport-internal_dev.c index 4061b9ee07f7..b6b1d7daa3cb 100644 --- a/trunk/net/openvswitch/vport-internal_dev.c +++ b/trunk/net/openvswitch/vport-internal_dev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -24,9 +24,6 @@ #include #include -#include -#include - #include "datapath.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -212,11 +209,6 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) int len; len = skb->len; - - skb_dst_drop(skb); - nf_reset(skb); - secpath_reset(skb); - skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); diff --git a/trunk/net/openvswitch/vport-internal_dev.h b/trunk/net/openvswitch/vport-internal_dev.h index 9a7d30ecc6a2..3454447c5f11 100644 --- a/trunk/net/openvswitch/vport-internal_dev.h +++ b/trunk/net/openvswitch/vport-internal_dev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/vport-netdev.c b/trunk/net/openvswitch/vport-netdev.c index 6ea3551cc78c..3fd6c0d88e12 100644 --- a/trunk/net/openvswitch/vport-netdev.c +++ b/trunk/net/openvswitch/vport-netdev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/vport-netdev.h b/trunk/net/openvswitch/vport-netdev.h index f7072a25c604..fd9b008a0e6e 100644 --- a/trunk/net/openvswitch/vport-netdev.h +++ b/trunk/net/openvswitch/vport-netdev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/vport.c b/trunk/net/openvswitch/vport.c index 6140336e79d7..6c066ba25dc7 100644 --- a/trunk/net/openvswitch/vport.c +++ b/trunk/net/openvswitch/vport.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/openvswitch/vport.h b/trunk/net/openvswitch/vport.h index aac680ca2b06..19609629dabd 100644 --- a/trunk/net/openvswitch/vport.h +++ b/trunk/net/openvswitch/vport.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2011 Nicira Networks. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/trunk/net/sctp/associola.c b/trunk/net/sctp/associola.c index ebaef3ed6065..8cf348e62e74 100644 --- a/trunk/net/sctp/associola.c +++ b/trunk/net/sctp/associola.c @@ -124,8 +124,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = sctp_pf_retrans; - asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min); @@ -688,9 +686,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Set the path max_retrans. */ peer->pathmaxrxt = asoc->pathmaxrxt; - /* And the partial failure retrnas threshold */ - peer->pf_retrans = asoc->pf_retrans; - /* Initialize the peer's SACK delay timeout based on the * association configured value. */ @@ -846,7 +841,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, struct sctp_ulpevent *event; struct sockaddr_storage addr; int spc_state = 0; - bool ulp_notify = true; /* Record the transition on the transport. */ switch (command) { @@ -860,14 +854,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_CONFIRMED; else spc_state = SCTP_ADDR_AVAILABLE; - /* Don't inform ULP about transition from PF to - * active state and set cwnd to 1, see SCTP - * Quick failover draft section 5.1, point 5 - */ - if (transport->state == SCTP_PF) { - ulp_notify = false; - transport->cwnd = 1; - } transport->state = SCTP_ACTIVE; break; @@ -886,11 +872,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_UNREACHABLE; break; - case SCTP_TRANSPORT_PF: - transport->state = SCTP_PF; - ulp_notify = false; - break; - default: return; } @@ -898,15 +879,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the * user. */ - if (ulp_notify) { - memset(&addr, 0, sizeof(struct sockaddr_storage)); - memcpy(&addr, &transport->ipaddr, - transport->af_specific->sockaddr_len); - event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, - 0, spc_state, error, GFP_ATOMIC); - if (event) - sctp_ulpq_tail_event(&asoc->ulpq, event); - } + memset(&addr, 0, sizeof(struct sockaddr_storage)); + memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, + 0, spc_state, error, GFP_ATOMIC); + if (event) + sctp_ulpq_tail_event(&asoc->ulpq, event); /* Select new active and retran paths. */ @@ -922,8 +900,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, transports) { if ((t->state == SCTP_INACTIVE) || - (t->state == SCTP_UNCONFIRMED) || - (t->state == SCTP_PF)) + (t->state == SCTP_UNCONFIRMED)) continue; if (!first || t->last_time_heard > first->last_time_heard) { second = first; diff --git a/trunk/net/sctp/outqueue.c b/trunk/net/sctp/outqueue.c index e7aa177c9522..a0fa19f5650c 100644 --- a/trunk/net/sctp/outqueue.c +++ b/trunk/net/sctp/outqueue.c @@ -792,8 +792,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (!new_transport) new_transport = asoc->peer.active_path; } else if ((new_transport->state == SCTP_INACTIVE) || - (new_transport->state == SCTP_UNCONFIRMED) || - (new_transport->state == SCTP_PF)) { + (new_transport->state == SCTP_UNCONFIRMED)) { /* If the chunk is Heartbeat or Heartbeat Ack, * send it to chunk->transport, even if it's * inactive. @@ -988,8 +987,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) new_transport = chunk->transport; if (!new_transport || ((new_transport->state == SCTP_INACTIVE) || - (new_transport->state == SCTP_UNCONFIRMED) || - (new_transport->state == SCTP_PF))) + (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) continue; diff --git a/trunk/net/sctp/sm_sideeffect.c b/trunk/net/sctp/sm_sideeffect.c index fe99628e1257..8716da1a8592 100644 --- a/trunk/net/sctp/sm_sideeffect.c +++ b/trunk/net/sctp/sm_sideeffect.c @@ -76,8 +76,6 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_cmd_seq_t *commands, gfp_t gfp); -static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, - struct sctp_transport *t); /******************************************************************** * Helper functions ********************************************************************/ @@ -472,8 +470,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { * notification SHOULD be sent to the upper layer. * */ -static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, - struct sctp_association *asoc, +static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, struct sctp_transport *transport, int is_hb) { @@ -498,23 +495,6 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, transport->error_count++; } - /* If the transport error count is greater than the pf_retrans - * threshold, and less than pathmaxrtx, then mark this transport - * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1, - * point 1 - */ - if ((transport->state != SCTP_PF) && - (asoc->pf_retrans < transport->pathmaxrxt) && - (transport->error_count > asoc->pf_retrans)) { - - sctp_assoc_control_transport(asoc, transport, - SCTP_TRANSPORT_PF, - 0); - - /* Update the hb timer to resend a heartbeat every rto */ - sctp_cmd_hb_timer_update(commands, transport); - } - if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", @@ -719,10 +699,6 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, SCTP_HEARTBEAT_SUCCESS); } - if (t->state == SCTP_PF) - sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, - SCTP_HEARTBEAT_SUCCESS); - /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. @@ -1589,8 +1565,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_STRIKE: /* Mark one strike against a transport. */ - sctp_do_8_2_transport_strike(commands, asoc, - cmd->obj.transport, 0); + sctp_do_8_2_transport_strike(asoc, cmd->obj.transport, + 0); break; case SCTP_CMD_TRANSPORT_IDLE: @@ -1600,8 +1576,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TRANSPORT_HB_SENT: t = cmd->obj.transport; - sctp_do_8_2_transport_strike(commands, asoc, - t, 1); + sctp_do_8_2_transport_strike(asoc, t, 1); t->hb_sent = 1; break; diff --git a/trunk/net/sctp/socket.c b/trunk/net/sctp/socket.c index 5e259817a7f3..5d488cdcf679 100644 --- a/trunk/net/sctp/socket.c +++ b/trunk/net/sctp/socket.c @@ -3478,56 +3478,6 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, } -/* - * SCTP_PEER_ADDR_THLDS - * - * This option allows us to alter the partially failed threshold for one or all - * transports in an association. See Section 6.1 of: - * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt - */ -static int sctp_setsockopt_paddr_thresholds(struct sock *sk, - char __user *optval, - unsigned int optlen) -{ - struct sctp_paddrthlds val; - struct sctp_transport *trans; - struct sctp_association *asoc; - - if (optlen < sizeof(struct sctp_paddrthlds)) - return -EINVAL; - if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, - sizeof(struct sctp_paddrthlds))) - return -EFAULT; - - - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) - return -ENOENT; - list_for_each_entry(trans, &asoc->peer.transport_addr_list, - transports) { - if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; - trans->pf_retrans = val.spt_pathpfthld; - } - - if (val.spt_pathmaxrxt) - asoc->pathmaxrxt = val.spt_pathmaxrxt; - asoc->pf_retrans = val.spt_pathpfthld; - } else { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); - if (!trans) - return -ENOENT; - - if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; - trans->pf_retrans = val.spt_pathpfthld; - } - - return 0; -} - /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3677,9 +3627,6 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; - case SCTP_PEER_ADDR_THLDS: - retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen); - break; default: retval = -ENOPROTOOPT; break; @@ -5551,51 +5498,6 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, return 0; } -/* - * SCTP_PEER_ADDR_THLDS - * - * This option allows us to fetch the partially failed threshold for one or all - * transports in an association. See Section 6.1 of: - * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt - */ -static int sctp_getsockopt_paddr_thresholds(struct sock *sk, - char __user *optval, - int len, - int __user *optlen) -{ - struct sctp_paddrthlds val; - struct sctp_transport *trans; - struct sctp_association *asoc; - - if (len < sizeof(struct sctp_paddrthlds)) - return -EINVAL; - len = sizeof(struct sctp_paddrthlds); - if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) - return -EFAULT; - - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) - return -ENOENT; - - val.spt_pathpfthld = asoc->pf_retrans; - val.spt_pathmaxrxt = asoc->pathmaxrxt; - } else { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); - if (!trans) - return -ENOENT; - - val.spt_pathmaxrxt = trans->pathmaxrxt; - val.spt_pathpfthld = trans->pf_retrans; - } - - if (put_user(len, optlen) || copy_to_user(optval, &val, len)) - return -EFAULT; - - return 0; -} - SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5734,9 +5636,6 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_AUTO_ASCONF: retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); break; - case SCTP_PEER_ADDR_THLDS: - retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen); - break; default: retval = -ENOPROTOOPT; break; diff --git a/trunk/net/sctp/sysctl.c b/trunk/net/sctp/sysctl.c index 2b2bfe933ff1..e5fe639c89e7 100644 --- a/trunk/net/sctp/sysctl.c +++ b/trunk/net/sctp/sysctl.c @@ -140,15 +140,6 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &int_max }, - { - .procname = "pf_retrans", - .data = &sctp_pf_retrans, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max - }, { .procname = "max_init_retransmits", .data = &sctp_max_retrans_init, diff --git a/trunk/net/sctp/transport.c b/trunk/net/sctp/transport.c index d1c652ed2f3d..a6b7ee9ce28a 100644 --- a/trunk/net/sctp/transport.c +++ b/trunk/net/sctp/transport.c @@ -87,7 +87,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Initialize the default path max_retrans. */ peer->pathmaxrxt = sctp_max_retrans_path; - peer->pf_retrans = sctp_pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -596,8 +595,7 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; timeout = t->rto + sctp_jitter(t->rto); - if ((t->state != SCTP_UNCONFIRMED) && - (t->state != SCTP_PF)) + if (t->state != SCTP_UNCONFIRMED) timeout += t->hbinterval; timeout += jiffies; return timeout; diff --git a/trunk/net/socket.c b/trunk/net/socket.c index dfe5b66c97e0..0452dca4cd24 100644 --- a/trunk/net/socket.c +++ b/trunk/net/socket.c @@ -398,7 +398,7 @@ int sock_map_fd(struct socket *sock, int flags) } EXPORT_SYMBOL(sock_map_fd); -struct socket *sock_from_file(struct file *file, int *err) +static struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ @@ -406,7 +406,6 @@ struct socket *sock_from_file(struct file *file, int *err) *err = -ENOTSOCK; return NULL; } -EXPORT_SYMBOL(sock_from_file); /** * sockfd_lookup - Go from a file number to its socket slot @@ -555,6 +554,8 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, sock_update_classid(sock->sk); + sock_update_netprioidx(sock->sk); + si->sock = sock; si->scm = NULL; si->msg = msg;