diff --git a/[refs] b/[refs] index 04aa6429bd19..0e8894b49af5 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 4b4f54670859074cf7670cc6fa96fe34a65846d9 +refs/heads/master: 895950c2a6565d9eefda4a38b00fa28537e39fcb diff --git a/trunk/drivers/net/bnx2.c b/trunk/drivers/net/bnx2.c index 85fc2c88af8e..5c811f3fa11a 100644 --- a/trunk/drivers/net/bnx2.c +++ b/trunk/drivers/net/bnx2.c @@ -8393,6 +8393,8 @@ bnx2_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct bnx2 *bp = netdev_priv(dev); + cancel_work_sync(&bp->reset_task); + unregister_netdev(dev); if (bp->mips_firmware) diff --git a/trunk/drivers/net/bnx2x/bnx2x_link.c b/trunk/drivers/net/bnx2x/bnx2x_link.c index 43b0de24f391..97cbee2927fc 100644 --- a/trunk/drivers/net/bnx2x/bnx2x_link.c +++ b/trunk/drivers/net/bnx2x/bnx2x_link.c @@ -354,6 +354,9 @@ u8 bnx2x_ets_strict(const struct link_params *params, const u8 strict_cos) struct bnx2x *bp = params->bp; u32 val = 0; + if ((1 < strict_cos) && (NULL == params)) + return -EINVAL; + DP(NETIF_MSG_LINK, "ETS enabled strict configuration\n"); /** * Bitmap of 5bits length. Each bit specifies whether the entry behaves diff --git a/trunk/drivers/net/sfc/efx.c b/trunk/drivers/net/sfc/efx.c index 711449c6e675..2166c1d0a533 100644 --- a/trunk/drivers/net/sfc/efx.c +++ b/trunk/drivers/net/sfc/efx.c @@ -461,6 +461,9 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) } } + spin_lock_init(&channel->tx_stop_lock); + atomic_set(&channel->tx_stop_count, 1); + rx_queue = &channel->rx_queue; rx_queue->efx = efx; setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, @@ -1403,11 +1406,11 @@ static void efx_start_all(struct efx_nic *efx) * restart the transmit interface early so the watchdog timer stops */ efx_start_port(efx); - if (efx_dev_registered(efx)) - netif_tx_wake_all_queues(efx->net_dev); - - efx_for_each_channel(channel, efx) + efx_for_each_channel(channel, efx) { + if (efx_dev_registered(efx)) + efx_wake_queue(channel); efx_start_channel(channel); + } if (efx->legacy_irq) efx->legacy_irq_enabled = true; @@ -1495,7 +1498,9 @@ static void efx_stop_all(struct efx_nic *efx) /* Stop the kernel transmit interface late, so the watchdog * timer isn't ticking over the flush */ if (efx_dev_registered(efx)) { - netif_tx_stop_all_queues(efx->net_dev); + struct efx_channel *channel; + efx_for_each_channel(channel, efx) + efx_stop_queue(channel); netif_tx_lock_bh(efx->net_dev); netif_tx_unlock_bh(efx->net_dev); } @@ -1891,7 +1896,6 @@ static DEVICE_ATTR(phy_type, 0644, show_phy_type, NULL); static int efx_register_netdev(struct efx_nic *efx) { struct net_device *net_dev = efx->net_dev; - struct efx_channel *channel; int rc; net_dev->watchdog_timeo = 5 * HZ; @@ -1914,14 +1918,6 @@ static int efx_register_netdev(struct efx_nic *efx) if (rc) goto fail_locked; - efx_for_each_channel(channel, efx) { - struct efx_tx_queue *tx_queue; - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->core_txq = netdev_get_tx_queue( - efx->net_dev, tx_queue->queue / EFX_TXQ_TYPES); - } - } - /* Always start with carrier off; PHY events will detect the link */ netif_carrier_off(efx->net_dev); diff --git a/trunk/drivers/net/sfc/efx.h b/trunk/drivers/net/sfc/efx.h index d43a7e5212b1..003fdb35b4bb 100644 --- a/trunk/drivers/net/sfc/efx.h +++ b/trunk/drivers/net/sfc/efx.h @@ -36,6 +36,8 @@ efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); extern netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); +extern void efx_stop_queue(struct efx_channel *channel); +extern void efx_wake_queue(struct efx_channel *channel); /* RX */ extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); diff --git a/trunk/drivers/net/sfc/net_driver.h b/trunk/drivers/net/sfc/net_driver.h index bdce66ddf93a..76f2fb197f0a 100644 --- a/trunk/drivers/net/sfc/net_driver.h +++ b/trunk/drivers/net/sfc/net_driver.h @@ -136,7 +136,6 @@ struct efx_tx_buffer { * @efx: The associated Efx NIC * @queue: DMA queue number * @channel: The associated channel - * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. @@ -149,6 +148,8 @@ struct efx_tx_buffer { * variable indicates that the queue is empty. This is to * avoid cache-line ping-pong between the xmit path and the * completion path. + * @stopped: Stopped count. + * Set if this TX queue is currently stopping its port. * @insert_count: Current insert pointer * This is the number of buffers that have been added to the * software ring. @@ -178,7 +179,7 @@ struct efx_tx_queue { struct efx_nic *efx ____cacheline_aligned_in_smp; unsigned queue; struct efx_channel *channel; - struct netdev_queue *core_txq; + struct efx_nic *nic; struct efx_tx_buffer *buffer; struct efx_special_buffer txd; unsigned int ptr_mask; @@ -187,6 +188,7 @@ struct efx_tx_queue { /* Members used mainly on the completion path */ unsigned int read_count ____cacheline_aligned_in_smp; unsigned int old_write_count; + int stopped; /* Members used only on the xmit path */ unsigned int insert_count ____cacheline_aligned_in_smp; @@ -319,6 +321,7 @@ enum efx_rx_alloc_method { * @irq_moderation: IRQ moderation value (in hardware ticks) * @napi_dev: Net device used with NAPI * @napi_str: NAPI control structure + * @reset_work: Scheduled reset work thread * @work_pending: Is work pending via NAPI? * @eventq: Event queue buffer * @eventq_mask: Event queue pointer mask @@ -339,6 +342,8 @@ enum efx_rx_alloc_method { * @n_rx_overlength: Count of RX_OVERLENGTH errors * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun * @rx_queue: RX queue for this channel + * @tx_stop_count: Core TX queue stop count + * @tx_stop_lock: Core TX queue stop lock * @tx_queue: TX queues for this channel */ struct efx_channel { @@ -377,6 +382,10 @@ struct efx_channel { bool rx_pkt_csummed; struct efx_rx_queue rx_queue; + + atomic_t tx_stop_count; + spinlock_t tx_stop_lock; + struct efx_tx_queue tx_queue[2]; }; diff --git a/trunk/drivers/net/sfc/tx.c b/trunk/drivers/net/sfc/tx.c index 2f5e9da657bf..bdb92b4af683 100644 --- a/trunk/drivers/net/sfc/tx.c +++ b/trunk/drivers/net/sfc/tx.c @@ -30,6 +30,50 @@ */ #define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) +/* We need to be able to nest calls to netif_tx_stop_queue(), partly + * because of the 2 hardware queues associated with each core queue, + * but also so that we can inhibit TX for reasons other than a full + * hardware queue. */ +void efx_stop_queue(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + struct efx_tx_queue *tx_queue = efx_channel_get_tx_queue(channel, 0); + + if (!tx_queue) + return; + + spin_lock_bh(&channel->tx_stop_lock); + netif_vdbg(efx, tx_queued, efx->net_dev, "stop TX queue\n"); + + atomic_inc(&channel->tx_stop_count); + netif_tx_stop_queue( + netdev_get_tx_queue(efx->net_dev, + tx_queue->queue / EFX_TXQ_TYPES)); + + spin_unlock_bh(&channel->tx_stop_lock); +} + +/* Decrement core TX queue stop count and wake it if the count is 0 */ +void efx_wake_queue(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + struct efx_tx_queue *tx_queue = efx_channel_get_tx_queue(channel, 0); + + if (!tx_queue) + return; + + local_bh_disable(); + if (atomic_dec_and_lock(&channel->tx_stop_count, + &channel->tx_stop_lock)) { + netif_vdbg(efx, tx_queued, efx->net_dev, "waking TX queue\n"); + netif_tx_wake_queue( + netdev_get_tx_queue(efx->net_dev, + tx_queue->queue / EFX_TXQ_TYPES)); + spin_unlock(&channel->tx_stop_lock); + } + local_bh_enable(); +} + static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer) { @@ -190,9 +234,9 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) * checked. Update the xmit path's * copy of read_count. */ - netif_tx_stop_queue(tx_queue->core_txq); + ++tx_queue->stopped; /* This memory barrier protects the - * change of queue state from the access + * change of stopped from the access * of read_count. */ smp_mb(); tx_queue->old_read_count = @@ -200,12 +244,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) fill_level = (tx_queue->insert_count - tx_queue->old_read_count); q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - rc = NETDEV_TX_BUSY; - goto unwind; - } + if (unlikely(q_space-- <= 0)) + goto stop; smp_mb(); - netif_tx_start_queue(tx_queue->core_txq); + --tx_queue->stopped; } insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; @@ -265,6 +307,13 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); + goto unwind; + + stop: + rc = NETDEV_TX_BUSY; + + if (tx_queue->stopped == 1) + efx_stop_queue(tx_queue->channel); unwind: /* Work backwards until we hit the original insert pointer value */ @@ -351,21 +400,32 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned fill_level; struct efx_nic *efx = tx_queue->efx; + struct netdev_queue *queue; EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); efx_dequeue_buffers(tx_queue, index); /* See if we need to restart the netif queue. This barrier - * separates the update of read_count from the test of the - * queue state. */ + * separates the update of read_count from the test of + * stopped. */ smp_mb(); - if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && - likely(efx->port_enabled)) { + if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) { fill_level = tx_queue->insert_count - tx_queue->read_count; if (fill_level < EFX_TXQ_THRESHOLD(efx)) { EFX_BUG_ON_PARANOID(!efx_dev_registered(efx)); - netif_tx_wake_queue(tx_queue->core_txq); + + /* Do this under netif_tx_lock(), to avoid racing + * with efx_xmit(). */ + queue = netdev_get_tx_queue( + efx->net_dev, + tx_queue->queue / EFX_TXQ_TYPES); + __netif_tx_lock(queue, smp_processor_id()); + if (tx_queue->stopped) { + tx_queue->stopped = 0; + efx_wake_queue(tx_queue->channel); + } + __netif_tx_unlock(queue); } } @@ -427,6 +487,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->read_count = 0; tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + BUG_ON(tx_queue->stopped); /* Set up TX descriptor ring */ efx_nic_init_tx(tx_queue); @@ -462,6 +523,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) /* Free up TSO header cache */ efx_fini_tso(tx_queue); + + /* Release queue's stop on port, if any */ + if (tx_queue->stopped) { + tx_queue->stopped = 0; + efx_wake_queue(tx_queue->channel); + } } void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) @@ -703,9 +770,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, * since the xmit path last checked. Update * the xmit path's copy of read_count. */ - netif_tx_stop_queue(tx_queue->core_txq); + ++tx_queue->stopped; /* This memory barrier protects the change of - * queue state from the access of read_count. */ + * stopped from the access of read_count. */ smp_mb(); tx_queue->old_read_count = ACCESS_ONCE(tx_queue->read_count); @@ -717,7 +784,7 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, return 1; } smp_mb(); - netif_tx_start_queue(tx_queue->core_txq); + --tx_queue->stopped; } insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; @@ -1057,10 +1124,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, while (1) { rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); - if (unlikely(rc)) { - rc2 = NETDEV_TX_BUSY; - goto unwind; - } + if (unlikely(rc)) + goto stop; /* Move onto the next fragment? */ if (state.in_len == 0) { @@ -1089,6 +1154,14 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, netif_err(efx, tx_err, efx->net_dev, "Out of memory for TSO headers, or PCI mapping error\n"); dev_kfree_skb_any(skb); + goto unwind; + + stop: + rc2 = NETDEV_TX_BUSY; + + /* Stop the queue if it wasn't stopped before. */ + if (tx_queue->stopped == 1) + efx_stop_queue(tx_queue->channel); unwind: /* Free the DMA mapping we were in the process of writing out */ diff --git a/trunk/drivers/net/stmmac/stmmac_main.c b/trunk/drivers/net/stmmac/stmmac_main.c index 34a0af3837f9..20f803df8681 100644 --- a/trunk/drivers/net/stmmac/stmmac_main.c +++ b/trunk/drivers/net/stmmac/stmmac_main.c @@ -1647,8 +1647,10 @@ static int stmmac_dvr_probe(struct platform_device *pdev) pr_info("STMMAC driver:\n\tplatform registration... "); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; + if (!res) { + ret = -ENODEV; + goto out; + } pr_info("\tdone!\n"); if (!request_mem_region(res->start, resource_size(res), @@ -1656,21 +1658,22 @@ static int stmmac_dvr_probe(struct platform_device *pdev) pr_err("%s: ERROR: memory allocation failed" "cannot get the I/O addr 0x%x\n", __func__, (unsigned int)res->start); - return -EBUSY; + ret = -EBUSY; + goto out; } addr = ioremap(res->start, resource_size(res)); if (!addr) { pr_err("%s: ERROR: memory mapping failed\n", __func__); ret = -ENOMEM; - goto out_release_region; + goto out; } ndev = alloc_etherdev(sizeof(struct stmmac_priv)); if (!ndev) { pr_err("%s: ERROR: allocating the device\n", __func__); ret = -ENOMEM; - goto out_unmap; + goto out; } SET_NETDEV_DEV(ndev, &pdev->dev); @@ -1680,8 +1683,8 @@ static int stmmac_dvr_probe(struct platform_device *pdev) if (ndev->irq == -ENXIO) { pr_err("%s: ERROR: MAC IRQ configuration " "information not found\n", __func__); - ret = -ENXIO; - goto out_free_ndev; + ret = -ENODEV; + goto out; } priv = netdev_priv(ndev); @@ -1708,18 +1711,18 @@ static int stmmac_dvr_probe(struct platform_device *pdev) if (priv->plat->init) { ret = priv->plat->init(pdev); if (unlikely(ret)) - goto out_free_ndev; + goto out; } /* MAC HW revice detection */ ret = stmmac_mac_device_setup(ndev); if (ret < 0) - goto out_plat_exit; + goto out; /* Network Device Registration */ ret = stmmac_probe(ndev); if (ret < 0) - goto out_plat_exit; + goto out; /* associate a PHY - it is provided by another platform bus */ if (!driver_for_each_device @@ -1727,7 +1730,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) stmmac_associate_phy)) { pr_err("No PHY device is associated with this MAC!\n"); ret = -ENODEV; - goto out_unregister; + goto out; } pr_info("\t%s - (dev. name: %s - id: %d, IRQ #%d\n" @@ -1738,22 +1741,19 @@ static int stmmac_dvr_probe(struct platform_device *pdev) pr_debug("\tMDIO bus (id: %d)...", priv->plat->bus_id); ret = stmmac_mdio_register(ndev); if (ret < 0) - goto out_unregister; + goto out; pr_debug("registered!\n"); - return 0; -out_unregister: - unregister_netdev(ndev); -out_plat_exit: - if (priv->plat->exit) - priv->plat->exit(pdev); -out_free_ndev: - free_netdev(ndev); - platform_set_drvdata(pdev, NULL); -out_unmap: - iounmap(addr); -out_release_region: - release_mem_region(res->start, resource_size(res)); +out: + if (ret < 0) { + if (priv->plat->exit) + priv->plat->exit(pdev); + + platform_set_drvdata(pdev, NULL); + release_mem_region(res->start, resource_size(res)); + if (addr != NULL) + iounmap(addr); + } return ret; } diff --git a/trunk/drivers/net/sundance.c b/trunk/drivers/net/sundance.c index e5662962c7bf..3ed2a67bd6d3 100644 --- a/trunk/drivers/net/sundance.c +++ b/trunk/drivers/net/sundance.c @@ -294,9 +294,6 @@ enum alta_offsets { /* Aliased and bogus values! */ RxStatus = 0x0c, }; - -#define ASIC_HI_WORD(x) ((x) + 2) - enum ASICCtrl_HiWord_bit { GlobalReset = 0x0001, RxReset = 0x0002, @@ -434,7 +431,6 @@ static void netdev_error(struct net_device *dev, int intr_status); static void netdev_error(struct net_device *dev, int intr_status); static void set_rx_mode(struct net_device *dev); static int __set_mac_addr(struct net_device *dev); -static int sundance_set_mac_addr(struct net_device *dev, void *data); static struct net_device_stats *get_stats(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int netdev_close(struct net_device *dev); @@ -468,7 +464,7 @@ static const struct net_device_ops netdev_ops = { .ndo_do_ioctl = netdev_ioctl, .ndo_tx_timeout = tx_timeout, .ndo_change_mtu = change_mtu, - .ndo_set_mac_address = sundance_set_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -1596,19 +1592,6 @@ static int __set_mac_addr(struct net_device *dev) return 0; } -/* Invoked with rtnl_lock held */ -static int sundance_set_mac_addr(struct net_device *dev, void *data) -{ - const struct sockaddr *addr = data; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EINVAL; - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); - __set_mac_addr(dev); - - return 0; -} - static const struct { const char name[ETH_GSTRING_LEN]; } sundance_stats[] = { @@ -1789,10 +1772,10 @@ static int netdev_close(struct net_device *dev) } iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset, - ioaddr + ASIC_HI_WORD(ASICCtrl)); + ioaddr +ASICCtrl + 2); for (i = 2000; i > 0; i--) { - if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0) + if ((ioread16(ioaddr + ASICCtrl +2) & ResetBusy) == 0) break; mdelay(1); } diff --git a/trunk/drivers/net/tg3.c b/trunk/drivers/net/tg3.c index 57e19fb1324f..92fc29910c2d 100644 --- a/trunk/drivers/net/tg3.c +++ b/trunk/drivers/net/tg3.c @@ -13086,17 +13086,15 @@ static inline u32 tg3_rx_ret_ring_size(struct tg3 *tp) return 512; } +DEFINE_PCI_DEVICE_TABLE(write_reorder_chipsets) = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE) }, + { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8385_0) }, + { }, +}; + static int __devinit tg3_get_invariants(struct tg3 *tp) { - static struct pci_device_id write_reorder_chipsets[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_FE_GATE_700C) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_8131_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_8385_0) }, - { }, - }; u32 misc_ctrl_reg; u32 pci_state_reg, grc_misc_cfg; u32 val; @@ -14229,6 +14227,11 @@ static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dm #define TEST_BUFFER_SIZE 0x2000 +DEFINE_PCI_DEVICE_TABLE(dma_wait_state_chipsets) = { + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_UNI_N_PCI15) }, + { }, +}; + static int __devinit tg3_test_dma(struct tg3 *tp) { dma_addr_t buf_dma; @@ -14398,11 +14401,6 @@ static int __devinit tg3_test_dma(struct tg3 *tp) } if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) != DMA_RWCTRL_WRITE_BNDRY_16) { - static struct pci_device_id dma_wait_state_chipsets[] = { - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, - PCI_DEVICE_ID_APPLE_UNI_N_PCI15) }, - { }, - }; /* DMA test passed without adjusting DMA boundary, * now look for chipsets that are known to expose the diff --git a/trunk/include/net/tcp.h b/trunk/include/net/tcp.h index 38509f047382..b4480300cadf 100644 --- a/trunk/include/net/tcp.h +++ b/trunk/include/net/tcp.h @@ -60,9 +60,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define MAX_TCP_WINDOW 32767U -/* Offer an initial receive window of 10 mss. */ -#define TCP_DEFAULT_INIT_RCVWND 10 - /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index a215269d2e35..59877290bca7 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -1547,6 +1547,13 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) struct sk_buff *skb2 = NULL; struct packet_type *pt_prev = NULL; +#ifdef CONFIG_NET_CLS_ACT + if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) + net_timestamp_set(skb); +#else + net_timestamp_set(skb); +#endif + rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { /* Never send packets back to the socket @@ -1565,8 +1572,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (!skb2) break; - net_timestamp_set(skb2); - /* skb->nh should be correctly set by sender, so that the second statement is just protection against buggy protocols. diff --git a/trunk/net/core/filter.c b/trunk/net/core/filter.c index 2b27d4efdd48..e8a6ac411ffb 100644 --- a/trunk/net/core/filter.c +++ b/trunk/net/core/filter.c @@ -85,17 +85,6 @@ enum { BPF_S_JMP_JGT_X, BPF_S_JMP_JSET_K, BPF_S_JMP_JSET_X, - /* Ancillary data */ - BPF_S_ANC_PROTOCOL, - BPF_S_ANC_PKTTYPE, - BPF_S_ANC_IFINDEX, - BPF_S_ANC_NLATTR, - BPF_S_ANC_NLATTR_NEST, - BPF_S_ANC_MARK, - BPF_S_ANC_QUEUE, - BPF_S_ANC_HATYPE, - BPF_S_ANC_RXHASH, - BPF_S_ANC_CPU, }; /* No hurry in this branch */ @@ -118,7 +107,11 @@ static inline void *load_pointer(const struct sk_buff *skb, int k, { if (k >= 0) return skb_header_pointer(skb, k, size, buffer); - return __load_pointer(skb, k, size); + else { + if (k >= SKF_AD_OFF) + return NULL; + return __load_pointer(skb, k, size); + } } /** @@ -276,7 +269,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, A = get_unaligned_be32(ptr); continue; } - return 0; + break; case BPF_S_LD_H_ABS: k = K; load_h: @@ -285,7 +278,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, A = get_unaligned_be16(ptr); continue; } - return 0; + break; case BPF_S_LD_B_ABS: k = K; load_b: @@ -294,7 +287,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, A = *(u8 *)ptr; continue; } - return 0; + break; case BPF_S_LD_W_LEN: A = skb->len; continue; @@ -345,35 +338,45 @@ unsigned int sk_run_filter(const struct sk_buff *skb, case BPF_S_STX: mem[K] = X; continue; - case BPF_S_ANC_PROTOCOL: + default: + WARN_ON(1); + return 0; + } + + /* + * Handle ancillary data, which are impossible + * (or very difficult) to get parsing packet contents. + */ + switch (k-SKF_AD_OFF) { + case SKF_AD_PROTOCOL: A = ntohs(skb->protocol); continue; - case BPF_S_ANC_PKTTYPE: + case SKF_AD_PKTTYPE: A = skb->pkt_type; continue; - case BPF_S_ANC_IFINDEX: + case SKF_AD_IFINDEX: if (!skb->dev) return 0; A = skb->dev->ifindex; continue; - case BPF_S_ANC_MARK: + case SKF_AD_MARK: A = skb->mark; continue; - case BPF_S_ANC_QUEUE: + case SKF_AD_QUEUE: A = skb->queue_mapping; continue; - case BPF_S_ANC_HATYPE: + case SKF_AD_HATYPE: if (!skb->dev) return 0; A = skb->dev->type; continue; - case BPF_S_ANC_RXHASH: + case SKF_AD_RXHASH: A = skb->rxhash; continue; - case BPF_S_ANC_CPU: + case SKF_AD_CPU: A = raw_smp_processor_id(); continue; - case BPF_S_ANC_NLATTR: { + case SKF_AD_NLATTR: { struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -389,7 +392,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, A = 0; continue; } - case BPF_S_ANC_NLATTR_NEST: { + case SKF_AD_NLATTR_NEST: { struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -409,7 +412,6 @@ unsigned int sk_run_filter(const struct sk_buff *skb, continue; } default: - WARN_ON(1); return 0; } } @@ -598,24 +600,6 @@ int sk_chk_filter(struct sock_filter *filter, int flen) pc + ftest->jf + 1 >= flen) return -EINVAL; break; - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: -#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ - code = BPF_S_ANC_##CODE; \ - break - switch (ftest->k) { - ANCILLARY(PROTOCOL); - ANCILLARY(PKTTYPE); - ANCILLARY(IFINDEX); - ANCILLARY(NLATTR); - ANCILLARY(NLATTR_NEST); - ANCILLARY(MARK); - ANCILLARY(QUEUE); - ANCILLARY(HATYPE); - ANCILLARY(RXHASH); - ANCILLARY(CPU); - } } ftest->code = code; } diff --git a/trunk/net/ipv4/tcp_output.c b/trunk/net/ipv4/tcp_output.c index dc7c096ddfef..2d390669d406 100644 --- a/trunk/net/ipv4/tcp_output.c +++ b/trunk/net/ipv4/tcp_output.c @@ -228,15 +228,10 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place - * a limit on the initial window when mss is larger than 1460. - */ + /* Set initial window to value enough for senders, following RFC5681. */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; - if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); + int init_cwnd = rfc3390_bytes_to_packets(mss); + /* when initializing use the value from init_rcv_wnd * rather than the default from above */ diff --git a/trunk/net/sched/sch_sfq.c b/trunk/net/sched/sch_sfq.c index 13322e8a0456..42396c965dd6 100644 --- a/trunk/net/sched/sch_sfq.c +++ b/trunk/net/sched/sch_sfq.c @@ -67,42 +67,27 @@ IMPLEMENTATION: This implementation limits maximal queue length to 128; - maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024. + maximal mtu to 2^15-1; number of hash buckets to 1024. The only goal of this restrictions was that all data - fit into one 4K page on 32bit arches. + fit into one 4K page :-). Struct sfq_sched_data is + organized in anti-cache manner: all the data for a bucket + are scattered over different locations. This is not good, + but it allowed me to put it into 4K. It is easy to increase these values, but not in flight. */ -#define SFQ_DEPTH 128 /* max number of packets per flow */ -#define SFQ_SLOTS 128 /* max number of flows */ -#define SFQ_EMPTY_SLOT 255 +#define SFQ_DEPTH 128 #define SFQ_HASH_DIVISOR 1024 -/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ +/* This type should contain at least SFQ_DEPTH*2 values */ typedef unsigned char sfq_index; -/* - * We dont use pointers to save space. - * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array - * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] - * are 'pointers' to dep[] array - */ struct sfq_head { sfq_index next; sfq_index prev; }; -struct sfq_slot { - struct sk_buff *skblist_next; - struct sk_buff *skblist_prev; - sfq_index qlen; /* number of skbs in skblist */ - sfq_index next; /* next slot in sfq chain */ - struct sfq_head dep; /* anchor in dep[] chains */ - unsigned short hash; /* hash value (index in ht[]) */ - short allot; /* credit for this slot */ -}; - struct sfq_sched_data { /* Parameters */ @@ -114,24 +99,17 @@ struct sfq_sched_data struct tcf_proto *filter_list; struct timer_list perturb_timer; u32 perturbation; - sfq_index cur_depth; /* depth of longest slot */ + sfq_index tail; /* Index of current slot in round */ + sfq_index max_depth; /* Maximal depth */ - struct sfq_slot *tail; /* current slot in round */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ - struct sfq_slot slots[SFQ_SLOTS]; - struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ + sfq_index next[SFQ_DEPTH]; /* Active slots link */ + short allot[SFQ_DEPTH]; /* Current allotment per slot */ + unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ + struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ + struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ }; -/* - * sfq_head are either in a sfq_slot or in dep[] array - */ -static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) -{ - if (val < SFQ_SLOTS) - return &q->slots[val].dep; - return &q->dep[val - SFQ_SLOTS]; -} - static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); @@ -222,41 +200,30 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } -/* - * x : slot number [0 .. SFQ_SLOTS - 1] - */ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int qlen = q->slots[x].qlen; - - p = qlen + SFQ_SLOTS; - n = q->dep[qlen].next; + int d = q->qs[x].qlen + SFQ_DEPTH; - q->slots[x].dep.next = n; - q->slots[x].dep.prev = p; - - q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ - sfq_dep_head(q, n)->prev = x; + p = d; + n = q->dep[d].next; + q->dep[x].next = n; + q->dep[x].prev = p; + q->dep[p].next = q->dep[n].prev = x; } -#define sfq_unlink(q, x, n, p) \ - n = q->slots[x].dep.next; \ - p = q->slots[x].dep.prev; \ - sfq_dep_head(q, p)->next = n; \ - sfq_dep_head(q, n)->prev = p - - static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int d; - sfq_unlink(q, x, n, p); + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + + if (n == p && q->max_depth == q->qs[x].qlen + 1) + q->max_depth--; - d = q->slots[x].qlen--; - if (n == p && q->cur_depth == d) - q->cur_depth--; sfq_link(q, x); } @@ -265,72 +232,34 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_index p, n; int d; - sfq_unlink(q, x, n, p); + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + d = q->qs[x].qlen; + if (q->max_depth < d) + q->max_depth = d; - d = ++q->slots[x].qlen; - if (q->cur_depth < d) - q->cur_depth = d; sfq_link(q, x); } -/* helper functions : might be changed when/if skb use a standard list_head */ - -/* remove one skb from tail of slot queue */ -static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) -{ - struct sk_buff *skb = slot->skblist_prev; - - slot->skblist_prev = skb->prev; - skb->next = skb->prev = NULL; - return skb; -} - -/* remove one skb from head of slot queue */ -static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) -{ - struct sk_buff *skb = slot->skblist_next; - - slot->skblist_next = skb->next; - skb->next = skb->prev = NULL; - return skb; -} - -static inline void slot_queue_init(struct sfq_slot *slot) -{ - slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; -} - -/* add skb to slot queue (tail add) */ -static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) -{ - skb->prev = slot->skblist_prev; - skb->next = (struct sk_buff *)slot; - slot->skblist_prev->next = skb; - slot->skblist_prev = skb; -} - -#define slot_queue_walk(slot, skb) \ - for (skb = slot->skblist_next; \ - skb != (struct sk_buff *)slot; \ - skb = skb->next) - static unsigned int sfq_drop(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index x, d = q->cur_depth; + sfq_index d = q->max_depth; struct sk_buff *skb; unsigned int len; - struct sfq_slot *slot; - /* Queue is full! Find the longest slot and drop tail packet from it */ + /* Queue is full! Find the longest slot and + drop a packet from it */ + if (d > 1) { - x = q->dep[d].next; - slot = &q->slots[x]; -drop: - skb = slot_dequeue_tail(slot); + sfq_index x = q->dep[d + SFQ_DEPTH].next; + skb = q->qs[x].prev; len = qdisc_pkt_len(skb); - sfq_dec(q, x); + __skb_unlink(skb, &q->qs[x]); kfree_skb(skb); + sfq_dec(q, x); sch->q.qlen--; sch->qstats.drops++; sch->qstats.backlog -= len; @@ -339,11 +268,18 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d == 1) { /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ - x = q->tail->next; - slot = &q->slots[x]; - q->tail->next = slot->next; - q->ht[slot->hash] = SFQ_EMPTY_SLOT; - goto drop; + d = q->next[q->tail]; + q->next[q->tail] = q->next[d]; + skb = q->qs[d].prev; + len = qdisc_pkt_len(skb); + __skb_unlink(skb, &q->qs[d]); + kfree_skb(skb); + sfq_dec(q, d); + sch->q.qlen--; + q->ht[q->hash[d]] = SFQ_DEPTH; + sch->qstats.drops++; + sch->qstats.backlog -= len; + return len; } return 0; @@ -355,7 +291,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; - struct sfq_slot *slot; int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); @@ -368,33 +303,30 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) hash--; x = q->ht[hash]; - slot = &q->slots[x]; - if (x == SFQ_EMPTY_SLOT) { - x = q->dep[0].next; /* get a free slot */ - q->ht[hash] = x; - slot = &q->slots[x]; - slot->hash = hash; - slot_queue_init(slot); + if (x == SFQ_DEPTH) { + q->ht[hash] = x = q->dep[SFQ_DEPTH].next; + q->hash[x] = hash; } - /* If selected queue has length q->limit, do simple tail drop, + /* If selected queue has length q->limit, this means that + * all another queues are empty and that we do simple tail drop, * i.e. drop _this_ packet. */ - if (slot->qlen >= q->limit) + if (q->qs[x].qlen >= q->limit) return qdisc_drop(skb, sch); sch->qstats.backlog += qdisc_pkt_len(skb); - slot_queue_add(slot, skb); + __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); - if (slot->qlen == 1) { /* The flow is new */ - if (q->tail == NULL) { /* It is the first flow */ - slot->next = x; + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == SFQ_DEPTH) { /* It is the first flow */ + q->next[x] = x; } else { - slot->next = q->tail->next; - q->tail->next = x; + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; } - q->tail = slot; - slot->allot = q->quantum; + q->tail = x; + q->allot[x] = q->quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -410,12 +342,14 @@ static struct sk_buff * sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); + sfq_index a; /* No active slots */ - if (q->tail == NULL) + if (q->tail == SFQ_DEPTH) return NULL; - return q->slots[q->tail->next].skblist_next; + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); } static struct sk_buff * @@ -424,31 +358,31 @@ sfq_dequeue(struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; sfq_index a, next_a; - struct sfq_slot *slot; /* No active slots */ - if (q->tail == NULL) + if (q->tail == SFQ_DEPTH) return NULL; - a = q->tail->next; - slot = &q->slots[a]; - skb = slot_dequeue_head(slot); + a = q->next[q->tail]; + + /* Grab packet */ + skb = __skb_dequeue(&q->qs[a]); sfq_dec(q, a); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ - if (slot->qlen == 0) { - q->ht[slot->hash] = SFQ_EMPTY_SLOT; - next_a = slot->next; + if (q->qs[a].qlen == 0) { + q->ht[q->hash[a]] = SFQ_DEPTH; + next_a = q->next[a]; if (a == next_a) { - q->tail = NULL; /* no more active slots */ + q->tail = SFQ_DEPTH; return skb; } - q->tail->next = next_a; - } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) { - q->tail = slot; - slot->allot += q->quantum; + q->next[q->tail] = next_a; + } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { + q->allot[a] += q->quantum; + q->tail = a; } return skb; } @@ -512,16 +446,17 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_EMPTY_SLOT; + q->ht[i] = SFQ_DEPTH; for (i = 0; i < SFQ_DEPTH; i++) { - q->dep[i].next = i + SFQ_SLOTS; - q->dep[i].prev = i + SFQ_SLOTS; + skb_queue_head_init(&q->qs[i]); + q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; + q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; } q->limit = SFQ_DEPTH - 1; - q->cur_depth = 0; - q->tail = NULL; + q->max_depth = 0; + q->tail = SFQ_DEPTH; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; @@ -532,7 +467,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } - for (i = 0; i < SFQ_SLOTS; i++) + for (i = 0; i < SFQ_DEPTH; i++) sfq_link(q, i); return 0; } @@ -608,12 +543,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]]; - struct gnet_stats_queue qs = { .qlen = slot->qlen }; - struct tc_sfq_xstats xstats = { .allot = slot->allot }; + sfq_index idx = q->ht[cl-1]; + struct sk_buff_head *list = &q->qs[idx]; + struct gnet_stats_queue qs = { .qlen = list->qlen }; + struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; struct sk_buff *skb; - slot_queue_walk(slot, skb) + skb_queue_walk(list, skb) qs.backlog += qdisc_pkt_len(skb); if (gnet_stats_copy_queue(d, &qs) < 0) @@ -630,7 +566,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < SFQ_HASH_DIVISOR; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT || + if (q->ht[i] == SFQ_DEPTH || arg->count < arg->skip) { arg->count++; continue;