diff --git a/[refs] b/[refs] index 340c7684d0e0..83935ac37424 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: b07f3bbee12163a6b48991138a37b87a1126462a +refs/heads/master: ae038af12c2bc0859279a1a62b5f8cb0ef00f5f8 diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 6b4b9cdec370..649600cb8ec9 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -6916,6 +6916,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86. S: Maintained F: drivers/platform/x86 +XEN NETWORK BACKEND DRIVER +M: Ian Campbell +L: xen-devel@lists.xensource.com (moderated for non-subscribers) +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/xen-netback/* + XEN PCI SUBSYSTEM M: Konrad Rzeszutek Wilk L: xen-devel@lists.xensource.com (moderated for non-subscribers) diff --git a/trunk/Makefile b/trunk/Makefile index 8392b64079df..322e7334ccb9 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 39 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* diff --git a/trunk/drivers/dma/fsldma.c b/trunk/drivers/dma/fsldma.c index 6b396759e7f5..8a781540590c 100644 --- a/trunk/drivers/dma/fsldma.c +++ b/trunk/drivers/dma/fsldma.c @@ -1448,7 +1448,7 @@ static const struct of_device_id fsldma_of_ids[] = { {} }; -static struct of_platform_driver fsldma_of_driver = { +static struct platform_driver fsldma_of_driver = { .driver = { .name = "fsl-elo-dma", .owner = THIS_MODULE, diff --git a/trunk/drivers/gpio/ml_ioh_gpio.c b/trunk/drivers/gpio/ml_ioh_gpio.c index 7f6f01a4b145..0a775f7987c2 100644 --- a/trunk/drivers/gpio/ml_ioh_gpio.c +++ b/trunk/drivers/gpio/ml_ioh_gpio.c @@ -116,6 +116,7 @@ static int ioh_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, reg_val |= (1 << nr); else reg_val &= ~(1 << nr); + iowrite32(reg_val, &chip->reg->regs[chip->ch].po); mutex_unlock(&chip->lock); diff --git a/trunk/drivers/gpio/pca953x.c b/trunk/drivers/gpio/pca953x.c index 583e92592073..7630ab7b9bec 100644 --- a/trunk/drivers/gpio/pca953x.c +++ b/trunk/drivers/gpio/pca953x.c @@ -558,7 +558,7 @@ static int __devinit pca953x_probe(struct i2c_client *client, ret = gpiochip_add(&chip->gpio_chip); if (ret) - goto out_failed; + goto out_failed_irq; if (pdata->setup) { ret = pdata->setup(client, chip->gpio_chip.base, @@ -570,8 +570,9 @@ static int __devinit pca953x_probe(struct i2c_client *client, i2c_set_clientdata(client, chip); return 0; -out_failed: +out_failed_irq: pca953x_irq_teardown(chip); +out_failed: kfree(chip->dyn_pdata); kfree(chip); return ret; diff --git a/trunk/drivers/gpio/pch_gpio.c b/trunk/drivers/gpio/pch_gpio.c index 2c6af8705103..f970a5f3585e 100644 --- a/trunk/drivers/gpio/pch_gpio.c +++ b/trunk/drivers/gpio/pch_gpio.c @@ -105,6 +105,7 @@ static int pch_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, reg_val |= (1 << nr); else reg_val &= ~(1 << nr); + iowrite32(reg_val, &chip->reg->po); mutex_unlock(&chip->lock); diff --git a/trunk/drivers/net/benet/be.h b/trunk/drivers/net/benet/be.h index f803c58b941d..66823eded7a3 100644 --- a/trunk/drivers/net/benet/be.h +++ b/trunk/drivers/net/benet/be.h @@ -154,7 +154,7 @@ struct be_eq_obj { u16 min_eqd; /* in usecs */ u16 max_eqd; /* in usecs */ u16 cur_eqd; /* in usecs */ - u8 msix_vec_idx; + u8 eq_idx; struct napi_struct napi; }; @@ -291,7 +291,7 @@ struct be_adapter { u32 num_rx_qs; u32 big_page_size; /* Compounded page size shared by rx wrbs */ - u8 msix_vec_next_idx; + u8 eq_next_idx; struct be_drv_stats drv_stats; struct vlan_group *vlan_grp; diff --git a/trunk/drivers/net/benet/be_main.c b/trunk/drivers/net/benet/be_main.c index 9a54c8b24ff9..7cb5a114c733 100644 --- a/trunk/drivers/net/benet/be_main.c +++ b/trunk/drivers/net/benet/be_main.c @@ -1497,7 +1497,7 @@ static int be_tx_queues_create(struct be_adapter *adapter) if (be_cmd_eq_create(adapter, eq, adapter->tx_eq.cur_eqd)) goto tx_eq_free; - adapter->tx_eq.msix_vec_idx = adapter->msix_vec_next_idx++; + adapter->tx_eq.eq_idx = adapter->eq_next_idx++; /* Alloc TX eth compl queue */ @@ -1590,7 +1590,7 @@ static int be_rx_queues_create(struct be_adapter *adapter) if (rc) goto err; - rxo->rx_eq.msix_vec_idx = adapter->msix_vec_next_idx++; + rxo->rx_eq.eq_idx = adapter->eq_next_idx++; /* CQ */ cq = &rxo->cq; @@ -1666,11 +1666,11 @@ static irqreturn_t be_intx(int irq, void *dev) if (!isr) return IRQ_NONE; - if ((1 << adapter->tx_eq.msix_vec_idx & isr)) + if ((1 << adapter->tx_eq.eq_idx & isr)) event_handle(adapter, &adapter->tx_eq); for_all_rx_queues(adapter, rxo, i) { - if ((1 << rxo->rx_eq.msix_vec_idx & isr)) + if ((1 << rxo->rx_eq.eq_idx & isr)) event_handle(adapter, &rxo->rx_eq); } } @@ -1951,7 +1951,7 @@ static void be_sriov_disable(struct be_adapter *adapter) static inline int be_msix_vec_get(struct be_adapter *adapter, struct be_eq_obj *eq_obj) { - return adapter->msix_entries[eq_obj->msix_vec_idx].vector; + return adapter->msix_entries[eq_obj->eq_idx].vector; } static int be_request_irq(struct be_adapter *adapter, @@ -2345,6 +2345,7 @@ static int be_clear(struct be_adapter *adapter) be_mcc_queues_destroy(adapter); be_rx_queues_destroy(adapter); be_tx_queues_destroy(adapter); + adapter->eq_next_idx = 0; if (be_physfn(adapter) && adapter->sriov_enabled) for (vf = 0; vf < num_vfs; vf++) @@ -3141,12 +3142,14 @@ static int be_resume(struct pci_dev *pdev) static void be_shutdown(struct pci_dev *pdev) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - if (netif_running(netdev)) + if (!adapter) + return; + + if (netif_running(adapter->netdev)) cancel_delayed_work_sync(&adapter->work); - netif_device_detach(netdev); + netif_device_detach(adapter->netdev); be_cmd_reset_function(adapter); diff --git a/trunk/drivers/net/bna/bfa_ioc.c b/trunk/drivers/net/bna/bfa_ioc.c index 34933cb9569f..e3de0b8625cd 100644 --- a/trunk/drivers/net/bna/bfa_ioc.c +++ b/trunk/drivers/net/bna/bfa_ioc.c @@ -2219,13 +2219,9 @@ bfa_nw_ioc_get_mac(struct bfa_ioc *ioc) static void bfa_ioc_recover(struct bfa_ioc *ioc) { - u16 bdf; - - bdf = (ioc->pcidev.pci_slot << 8 | ioc->pcidev.pci_func << 3 | - ioc->pcidev.device_id); - - pr_crit("Firmware heartbeat failure at %d", bdf); - BUG_ON(1); + pr_crit("Heart Beat of IOC has failed\n"); + bfa_ioc_stats(ioc, ioc_hbfails); + bfa_fsm_send_event(ioc, IOC_E_HBFAIL); } static void diff --git a/trunk/drivers/net/can/mcp251x.c b/trunk/drivers/net/can/mcp251x.c index 7513c4523ac4..330140ee266d 100644 --- a/trunk/drivers/net/can/mcp251x.c +++ b/trunk/drivers/net/can/mcp251x.c @@ -931,7 +931,8 @@ static int mcp251x_open(struct net_device *net) priv->tx_len = 0; ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist, - IRQF_TRIGGER_FALLING, DEVICE_NAME, priv); + pdata->irq_flags ? pdata->irq_flags : IRQF_TRIGGER_FALLING, + DEVICE_NAME, priv); if (ret) { dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq); if (pdata->transceiver_enable) diff --git a/trunk/drivers/net/mlx4/en_rx.c b/trunk/drivers/net/mlx4/en_rx.c index cfd50bc49169..62dd21b06df4 100644 --- a/trunk/drivers/net/mlx4/en_rx.c +++ b/trunk/drivers/net/mlx4/en_rx.c @@ -345,6 +345,8 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) err = mlx4_en_init_allocator(priv, ring); if (err) { en_err(priv, "Failed initializing ring allocator\n"); + if (ring->stride <= TXBB_SIZE) + ring->buf -= TXBB_SIZE; ring_ind--; goto err_allocator; } @@ -369,6 +371,8 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring_ind = priv->rx_ring_num - 1; err_allocator: while (ring_ind >= 0) { + if (priv->rx_ring[ring_ind].stride <= TXBB_SIZE) + priv->rx_ring[ring_ind].buf -= TXBB_SIZE; mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]); ring_ind--; } diff --git a/trunk/drivers/net/mlx4/main.c b/trunk/drivers/net/mlx4/main.c index 62fa7eec5f0c..3814fc9b1145 100644 --- a/trunk/drivers/net/mlx4/main.c +++ b/trunk/drivers/net/mlx4/main.c @@ -944,6 +944,10 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) } for (port = 1; port <= dev->caps.num_ports; port++) { + enum mlx4_port_type port_type = 0; + mlx4_SENSE_PORT(dev, port, &port_type); + if (port_type) + dev->caps.port_type[port] = port_type; ib_port_default_caps = 0; err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); if (err) @@ -958,6 +962,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_mcg_table_free; } } + mlx4_set_port_mask(dev); return 0; diff --git a/trunk/drivers/net/mlx4/mlx4.h b/trunk/drivers/net/mlx4/mlx4.h index c1e0e5f1bcdb..dd7d745fbab4 100644 --- a/trunk/drivers/net/mlx4/mlx4.h +++ b/trunk/drivers/net/mlx4/mlx4.h @@ -431,6 +431,8 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_handle_catas_err(struct mlx4_dev *dev); +int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, + enum mlx4_port_type *type); void mlx4_do_sense_ports(struct mlx4_dev *dev, enum mlx4_port_type *stype, enum mlx4_port_type *defaults); diff --git a/trunk/drivers/net/mlx4/sense.c b/trunk/drivers/net/mlx4/sense.c index 015fbe785c13..e2337a7411d9 100644 --- a/trunk/drivers/net/mlx4/sense.c +++ b/trunk/drivers/net/mlx4/sense.c @@ -38,8 +38,8 @@ #include "mlx4.h" -static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, - enum mlx4_port_type *type) +int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, + enum mlx4_port_type *type) { u64 out_param; int err = 0; diff --git a/trunk/drivers/net/pppoe.c b/trunk/drivers/net/pppoe.c index 693aaef4e3ce..718879b35b7d 100644 --- a/trunk/drivers/net/pppoe.c +++ b/trunk/drivers/net/pppoe.c @@ -317,7 +317,7 @@ static void pppoe_flush_dev(struct net_device *dev) lock_sock(sk); if (po->pppoe_dev == dev && - sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { pppox_unbind_sock(sk); sk->sk_state = PPPOX_ZOMBIE; sk->sk_state_change(sk); diff --git a/trunk/drivers/net/smsc911x.c b/trunk/drivers/net/smsc911x.c index c498b720b532..4b42ecc63dcf 100644 --- a/trunk/drivers/net/smsc911x.c +++ b/trunk/drivers/net/smsc911x.c @@ -1818,6 +1818,7 @@ static int __devinit smsc911x_init(struct net_device *dev) SMSC_TRACE(PROBE, "PHY will be autodetected."); spin_lock_init(&pdata->dev_lock); + spin_lock_init(&pdata->mac_lock); if (pdata->ioaddr == 0) { SMSC_WARNING(PROBE, "pdata->ioaddr: 0x00000000"); @@ -1895,8 +1896,11 @@ static int __devinit smsc911x_init(struct net_device *dev) /* workaround for platforms without an eeprom, where the mac address * is stored elsewhere and set by the bootloader. This saves the * mac address before resetting the device */ - if (pdata->config.flags & SMSC911X_SAVE_MAC_ADDRESS) + if (pdata->config.flags & SMSC911X_SAVE_MAC_ADDRESS) { + spin_lock_irq(&pdata->mac_lock); smsc911x_read_mac_address(dev); + spin_unlock_irq(&pdata->mac_lock); + } /* Reset the LAN911x */ if (smsc911x_soft_reset(pdata)) @@ -2059,8 +2063,6 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev) SMSC_TRACE(PROBE, "Network interface: \"%s\"", dev->name); } - spin_lock_init(&pdata->mac_lock); - retval = smsc911x_mii_init(pdev, dev); if (retval) { SMSC_WARNING(PROBE, diff --git a/trunk/drivers/net/usb/smsc95xx.c b/trunk/drivers/net/usb/smsc95xx.c index 727874d9deb6..47a6c870b51f 100644 --- a/trunk/drivers/net/usb/smsc95xx.c +++ b/trunk/drivers/net/usb/smsc95xx.c @@ -1313,6 +1313,21 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0424, 0x9909), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* SMSC LAN9530 USB Ethernet Device */ + USB_DEVICE(0x0424, 0x9530), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC LAN9730 USB Ethernet Device */ + USB_DEVICE(0x0424, 0x9730), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC LAN89530 USB Ethernet Device */ + USB_DEVICE(0x0424, 0x9E08), + .driver_info = (unsigned long) &smsc95xx_info, + }, { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/trunk/drivers/net/wireless/ath/ath9k/hw.c b/trunk/drivers/net/wireless/ath/ath9k/hw.c index 338b07502f1a..1ec9bcd6b281 100644 --- a/trunk/drivers/net/wireless/ath/ath9k/hw.c +++ b/trunk/drivers/net/wireless/ath/ath9k/hw.c @@ -2546,6 +2546,7 @@ static struct { { AR_SREV_VERSION_9287, "9287" }, { AR_SREV_VERSION_9271, "9271" }, { AR_SREV_VERSION_9300, "9300" }, + { AR_SREV_VERSION_9485, "9485" }, }; /* For devices with external radios */ diff --git a/trunk/drivers/net/wireless/b43/dma.c b/trunk/drivers/net/wireless/b43/dma.c index 3d5566e7af0a..ff0f5ba14b2c 100644 --- a/trunk/drivers/net/wireless/b43/dma.c +++ b/trunk/drivers/net/wireless/b43/dma.c @@ -1536,7 +1536,7 @@ static void dma_rx(struct b43_dmaring *ring, int *slot) dmaaddr = meta->dmaaddr; goto drop_recycle_buffer; } - if (unlikely(len > ring->rx_buffersize)) { + if (unlikely(len + ring->frameoffset > ring->rx_buffersize)) { /* The data did not fit into one descriptor buffer * and is split over multiple buffers. * This should never happen, as we try to allocate buffers diff --git a/trunk/drivers/net/wireless/b43/dma.h b/trunk/drivers/net/wireless/b43/dma.h index a01c2100f166..e8a80a1251bf 100644 --- a/trunk/drivers/net/wireless/b43/dma.h +++ b/trunk/drivers/net/wireless/b43/dma.h @@ -163,7 +163,7 @@ struct b43_dmadesc_generic { /* DMA engine tuning knobs */ #define B43_TXRING_SLOTS 256 #define B43_RXRING_SLOTS 64 -#define B43_DMA0_RX_BUFFERSIZE IEEE80211_MAX_FRAME_LEN +#define B43_DMA0_RX_BUFFERSIZE (B43_DMA0_RX_FRAMEOFFSET + IEEE80211_MAX_FRAME_LEN) /* Pointer poison */ #define B43_DMA_PTR_POISON ((void *)ERR_PTR(-ENOMEM)) diff --git a/trunk/drivers/net/wireless/iwlwifi/iwl-eeprom.h b/trunk/drivers/net/wireless/iwlwifi/iwl-eeprom.h index 98aa8af01192..20b66469d68f 100644 --- a/trunk/drivers/net/wireless/iwlwifi/iwl-eeprom.h +++ b/trunk/drivers/net/wireless/iwlwifi/iwl-eeprom.h @@ -241,7 +241,7 @@ struct iwl_eeprom_enhanced_txpwr { /* 6x00 Specific */ #define EEPROM_6000_TX_POWER_VERSION (4) -#define EEPROM_6000_EEPROM_VERSION (0x434) +#define EEPROM_6000_EEPROM_VERSION (0x423) /* 6x50 Specific */ #define EEPROM_6050_TX_POWER_VERSION (4) diff --git a/trunk/drivers/net/wireless/p54/p54usb.c b/trunk/drivers/net/wireless/p54/p54usb.c index 9b344a921e74..e18358725b69 100644 --- a/trunk/drivers/net/wireless/p54/p54usb.c +++ b/trunk/drivers/net/wireless/p54/p54usb.c @@ -56,6 +56,7 @@ static struct usb_device_id p54u_table[] __devinitdata = { {USB_DEVICE(0x0846, 0x4210)}, /* Netgear WG121 the second ? */ {USB_DEVICE(0x0846, 0x4220)}, /* Netgear WG111 */ {USB_DEVICE(0x09aa, 0x1000)}, /* Spinnaker Proto board */ + {USB_DEVICE(0x0bf8, 0x1007)}, /* Fujitsu E-5400 USB */ {USB_DEVICE(0x0cde, 0x0006)}, /* Medion 40900, Roper Europe */ {USB_DEVICE(0x0db0, 0x6826)}, /* MSI UB54G (MS-6826) */ {USB_DEVICE(0x107b, 0x55f2)}, /* Gateway WGU-210 (Gemtek) */ @@ -68,6 +69,7 @@ static struct usb_device_id p54u_table[] __devinitdata = { {USB_DEVICE(0x1915, 0x2235)}, /* Linksys WUSB54G Portable OEM */ {USB_DEVICE(0x2001, 0x3701)}, /* DLink DWL-G120 Spinnaker */ {USB_DEVICE(0x2001, 0x3703)}, /* DLink DWL-G122 */ + {USB_DEVICE(0x2001, 0x3762)}, /* Conceptronic C54U */ {USB_DEVICE(0x5041, 0x2234)}, /* Linksys WUSB54G */ {USB_DEVICE(0x5041, 0x2235)}, /* Linksys WUSB54G Portable */ diff --git a/trunk/drivers/net/wireless/rt2x00/rt2x00dev.c b/trunk/drivers/net/wireless/rt2x00/rt2x00dev.c index 9de9dbe94399..84eb6ad36377 100644 --- a/trunk/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/trunk/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1062,8 +1062,10 @@ void rt2x00lib_remove_dev(struct rt2x00_dev *rt2x00dev) * Stop all work. */ cancel_work_sync(&rt2x00dev->intf_work); - cancel_work_sync(&rt2x00dev->rxdone_work); - cancel_work_sync(&rt2x00dev->txdone_work); + if (rt2x00_is_usb(rt2x00dev)) { + cancel_work_sync(&rt2x00dev->rxdone_work); + cancel_work_sync(&rt2x00dev->txdone_work); + } destroy_workqueue(rt2x00dev->workqueue); /* diff --git a/trunk/drivers/net/wireless/rtlwifi/efuse.c b/trunk/drivers/net/wireless/rtlwifi/efuse.c index f74a8701c67d..590f14f45a89 100644 --- a/trunk/drivers/net/wireless/rtlwifi/efuse.c +++ b/trunk/drivers/net/wireless/rtlwifi/efuse.c @@ -685,7 +685,7 @@ static int efuse_pg_packet_read(struct ieee80211_hw *hw, u8 offset, u8 *data) u8 efuse_data, word_cnts = 0; u16 efuse_addr = 0; - u8 hworden; + u8 hworden = 0; u8 tmpdata[8]; if (data == NULL) diff --git a/trunk/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c b/trunk/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c index 5ef91374b230..28a6ce3bc239 100644 --- a/trunk/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c +++ b/trunk/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c @@ -303,7 +303,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, u16 box_reg, box_extreg; u8 u1b_tmp; bool isfw_read = false; - u8 buf_index; + u8 buf_index = 0; bool bwrite_sucess = false; u8 wait_h2c_limmit = 100; u8 wait_writeh2c_limmit = 100; diff --git a/trunk/drivers/net/wireless/rtlwifi/usb.c b/trunk/drivers/net/wireless/rtlwifi/usb.c index a4b2613d6a8c..f5d85735d642 100644 --- a/trunk/drivers/net/wireless/rtlwifi/usb.c +++ b/trunk/drivers/net/wireless/rtlwifi/usb.c @@ -246,7 +246,7 @@ static void _rtl_usb_io_handler_init(struct device *dev, static void _rtl_usb_io_handler_release(struct ieee80211_hw *hw) { - struct rtl_priv *rtlpriv = rtl_priv(hw); + struct rtl_priv __maybe_unused *rtlpriv = rtl_priv(hw); mutex_destroy(&rtlpriv->io.bb_mutex); } diff --git a/trunk/drivers/net/wireless/wl12xx/sdio.c b/trunk/drivers/net/wireless/wl12xx/sdio.c index 5b9dbeafec06..b1c7d031c391 100644 --- a/trunk/drivers/net/wireless/wl12xx/sdio.c +++ b/trunk/drivers/net/wireless/wl12xx/sdio.c @@ -340,7 +340,7 @@ module_init(wl1271_init); module_exit(wl1271_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Luciano Coelho "); +MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); MODULE_FIRMWARE(WL1271_FW_NAME); MODULE_FIRMWARE(WL1271_AP_FW_NAME); diff --git a/trunk/drivers/net/wireless/wl12xx/spi.c b/trunk/drivers/net/wireless/wl12xx/spi.c index 18cf01719ae0..ffc745b17f4d 100644 --- a/trunk/drivers/net/wireless/wl12xx/spi.c +++ b/trunk/drivers/net/wireless/wl12xx/spi.c @@ -487,7 +487,7 @@ module_init(wl1271_init); module_exit(wl1271_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Luciano Coelho "); +MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); MODULE_FIRMWARE(WL1271_FW_NAME); MODULE_FIRMWARE(WL1271_AP_FW_NAME); diff --git a/trunk/drivers/net/wireless/wl12xx/testmode.c b/trunk/drivers/net/wireless/wl12xx/testmode.c index e64403b6896d..6ec06a4a4c6d 100644 --- a/trunk/drivers/net/wireless/wl12xx/testmode.c +++ b/trunk/drivers/net/wireless/wl12xx/testmode.c @@ -204,7 +204,10 @@ static int wl1271_tm_cmd_nvs_push(struct wl1271 *wl, struct nlattr *tb[]) kfree(wl->nvs); - wl->nvs = kzalloc(sizeof(struct wl1271_nvs_file), GFP_KERNEL); + if (len != sizeof(struct wl1271_nvs_file)) + return -EINVAL; + + wl->nvs = kzalloc(len, GFP_KERNEL); if (!wl->nvs) { wl1271_error("could not allocate memory for the nvs file"); ret = -ENOMEM; diff --git a/trunk/drivers/net/wireless/zd1211rw/zd_usb.c b/trunk/drivers/net/wireless/zd1211rw/zd_usb.c index 58236e6d0921..ab607bbd6291 100644 --- a/trunk/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/trunk/drivers/net/wireless/zd1211rw/zd_usb.c @@ -643,7 +643,7 @@ static void rx_urb_complete(struct urb *urb) usb = urb->context; rx = &usb->rx; - zd_usb_reset_rx_idle_timer(usb); + tasklet_schedule(&rx->reset_timer_tasklet); if (length%rx->usb_packet_size > rx->usb_packet_size-4) { /* If there is an old first fragment, we don't care. */ @@ -812,6 +812,7 @@ void zd_usb_disable_rx(struct zd_usb *usb) __zd_usb_disable_rx(usb); mutex_unlock(&rx->setup_mutex); + tasklet_kill(&rx->reset_timer_tasklet); cancel_delayed_work_sync(&rx->idle_work); } @@ -1106,6 +1107,13 @@ static void zd_rx_idle_timer_handler(struct work_struct *work) zd_usb_reset_rx(usb); } +static void zd_usb_reset_rx_idle_timer_tasklet(unsigned long param) +{ + struct zd_usb *usb = (struct zd_usb *)param; + + zd_usb_reset_rx_idle_timer(usb); +} + void zd_usb_reset_rx_idle_timer(struct zd_usb *usb) { struct zd_usb_rx *rx = &usb->rx; @@ -1127,6 +1135,7 @@ static inline void init_usb_interrupt(struct zd_usb *usb) static inline void init_usb_rx(struct zd_usb *usb) { struct zd_usb_rx *rx = &usb->rx; + spin_lock_init(&rx->lock); mutex_init(&rx->setup_mutex); if (interface_to_usbdev(usb->intf)->speed == USB_SPEED_HIGH) { @@ -1136,11 +1145,14 @@ static inline void init_usb_rx(struct zd_usb *usb) } ZD_ASSERT(rx->fragment_length == 0); INIT_DELAYED_WORK(&rx->idle_work, zd_rx_idle_timer_handler); + rx->reset_timer_tasklet.func = zd_usb_reset_rx_idle_timer_tasklet; + rx->reset_timer_tasklet.data = (unsigned long)usb; } static inline void init_usb_tx(struct zd_usb *usb) { struct zd_usb_tx *tx = &usb->tx; + spin_lock_init(&tx->lock); atomic_set(&tx->enabled, 0); tx->stopped = 0; @@ -1671,6 +1683,10 @@ static void iowrite16v_urb_complete(struct urb *urb) if (urb->status && !usb->cmd_error) usb->cmd_error = urb->status; + + if (!usb->cmd_error && + urb->actual_length != urb->transfer_buffer_length) + usb->cmd_error = -EIO; } static int zd_submit_waiting_urb(struct zd_usb *usb, bool last) @@ -1805,7 +1821,7 @@ int zd_usb_iowrite16v_async(struct zd_usb *usb, const struct zd_ioreq16 *ioreqs, usb_fill_int_urb(urb, udev, usb_sndintpipe(udev, EP_REGS_OUT), req, req_len, iowrite16v_urb_complete, usb, ep->desc.bInterval); - urb->transfer_flags |= URB_FREE_BUFFER | URB_SHORT_NOT_OK; + urb->transfer_flags |= URB_FREE_BUFFER; /* Submit previous URB */ r = zd_submit_waiting_urb(usb, false); diff --git a/trunk/drivers/net/wireless/zd1211rw/zd_usb.h b/trunk/drivers/net/wireless/zd1211rw/zd_usb.h index b3df2c8116cc..325d0f989257 100644 --- a/trunk/drivers/net/wireless/zd1211rw/zd_usb.h +++ b/trunk/drivers/net/wireless/zd1211rw/zd_usb.h @@ -183,6 +183,7 @@ struct zd_usb_rx { spinlock_t lock; struct mutex setup_mutex; struct delayed_work idle_work; + struct tasklet_struct reset_timer_tasklet; u8 fragment[2 * USB_MAX_RX_SIZE]; unsigned int fragment_length; unsigned int usb_packet_size; diff --git a/trunk/drivers/pci/setup-bus.c b/trunk/drivers/pci/setup-bus.c index 89d0a6a88df7..ebf51ad1b714 100644 --- a/trunk/drivers/pci/setup-bus.c +++ b/trunk/drivers/pci/setup-bus.c @@ -676,10 +676,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, min_align = align1 >> 1; align += aligns[order]; } - size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), align); + size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); size1 = !add_size ? size : calculate_memsize(size, min_size+add_size, 0, - resource_size(b_res), align); + resource_size(b_res), min_align); if (!size0 && !size1) { if (b_res->start || b_res->end) dev_info(&bus->self->dev, "disabling bridge window " diff --git a/trunk/drivers/spi/amba-pl022.c b/trunk/drivers/spi/amba-pl022.c index 5825370bad25..08de58e7f59f 100644 --- a/trunk/drivers/spi/amba-pl022.c +++ b/trunk/drivers/spi/amba-pl022.c @@ -1555,7 +1555,7 @@ static int stop_queue(struct pl022 *pl022) * A wait_queue on the pl022->busy could be used, but then the common * execution path (pump_messages) would be required to call wake_up or * friends on every SPI message. Do this instead */ - while (!list_empty(&pl022->queue) && pl022->busy && limit--) { + while ((!list_empty(&pl022->queue) || pl022->busy) && limit--) { spin_unlock_irqrestore(&pl022->queue_lock, flags); msleep(10); spin_lock_irqsave(&pl022->queue_lock, flags); diff --git a/trunk/drivers/spi/dw_spi.c b/trunk/drivers/spi/dw_spi.c index b1a4b9f503ae..871e337c917f 100644 --- a/trunk/drivers/spi/dw_spi.c +++ b/trunk/drivers/spi/dw_spi.c @@ -821,7 +821,7 @@ static int stop_queue(struct dw_spi *dws) spin_lock_irqsave(&dws->lock, flags); dws->run = QUEUE_STOPPED; - while (!list_empty(&dws->queue) && dws->busy && limit--) { + while ((!list_empty(&dws->queue) || dws->busy) && limit--) { spin_unlock_irqrestore(&dws->lock, flags); msleep(10); spin_lock_irqsave(&dws->lock, flags); diff --git a/trunk/drivers/spi/pxa2xx_spi.c b/trunk/drivers/spi/pxa2xx_spi.c index 9c74aad6be93..dc25bee8d33f 100644 --- a/trunk/drivers/spi/pxa2xx_spi.c +++ b/trunk/drivers/spi/pxa2xx_spi.c @@ -1493,7 +1493,7 @@ static int stop_queue(struct driver_data *drv_data) * execution path (pump_messages) would be required to call wake_up or * friends on every SPI message. Do this instead */ drv_data->run = QUEUE_STOPPED; - while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) { + while ((!list_empty(&drv_data->queue) || drv_data->busy) && limit--) { spin_unlock_irqrestore(&drv_data->lock, flags); msleep(10); spin_lock_irqsave(&drv_data->lock, flags); diff --git a/trunk/drivers/spi/spi_bfin5xx.c b/trunk/drivers/spi/spi_bfin5xx.c index bdb7289a1d22..f706dba165cf 100644 --- a/trunk/drivers/spi/spi_bfin5xx.c +++ b/trunk/drivers/spi/spi_bfin5xx.c @@ -1284,7 +1284,7 @@ static inline int bfin_spi_stop_queue(struct bfin_spi_master_data *drv_data) * friends on every SPI message. Do this instead */ drv_data->running = false; - while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) { + while ((!list_empty(&drv_data->queue) || drv_data->busy) && limit--) { spin_unlock_irqrestore(&drv_data->lock, flags); msleep(10); spin_lock_irqsave(&drv_data->lock, flags); diff --git a/trunk/drivers/video/omap2/dss/dsi.c b/trunk/drivers/video/omap2/dss/dsi.c index 0a7f1a47f8e3..86041535f34a 100644 --- a/trunk/drivers/video/omap2/dss/dsi.c +++ b/trunk/drivers/video/omap2/dss/dsi.c @@ -1276,6 +1276,9 @@ int dsi_pll_set_clock_div(struct dsi_clock_info *cinfo) DSSDBGF(); + dsi.current_cinfo.use_sys_clk = cinfo->use_sys_clk; + dsi.current_cinfo.highfreq = cinfo->highfreq; + dsi.current_cinfo.fint = cinfo->fint; dsi.current_cinfo.clkin4ddr = cinfo->clkin4ddr; dsi.current_cinfo.dsi_pll_hsdiv_dispc_clk = diff --git a/trunk/drivers/video/s3c-fb.c b/trunk/drivers/video/s3c-fb.c index 3fa7911ac906..3b6cdcac8f1a 100644 --- a/trunk/drivers/video/s3c-fb.c +++ b/trunk/drivers/video/s3c-fb.c @@ -182,7 +182,6 @@ struct s3c_fb_vsync { /** * struct s3c_fb - overall hardware state of the hardware - * @slock: The spinlock protection for this data sturcture. * @dev: The device that we bound to, for printing, etc. * @regs_res: The resource we claimed for the IO registers. * @bus_clk: The clk (hclk) feeding our interface and possibly pixclk. @@ -196,7 +195,6 @@ struct s3c_fb_vsync { * @vsync_info: VSYNC-related information (count, queues...) */ struct s3c_fb { - spinlock_t slock; struct device *dev; struct resource *regs_res; struct clk *bus_clk; @@ -949,8 +947,6 @@ static irqreturn_t s3c_fb_irq(int irq, void *dev_id) void __iomem *regs = sfb->regs; u32 irq_sts_reg; - spin_lock(&sfb->slock); - irq_sts_reg = readl(regs + VIDINTCON1); if (irq_sts_reg & VIDINTCON1_INT_FRAME) { @@ -967,7 +963,6 @@ static irqreturn_t s3c_fb_irq(int irq, void *dev_id) */ s3c_fb_disable_irq(sfb); - spin_unlock(&sfb->slock); return IRQ_HANDLED; } @@ -1344,8 +1339,6 @@ static int __devinit s3c_fb_probe(struct platform_device *pdev) sfb->pdata = pd; sfb->variant = fbdrv->variant; - spin_lock_init(&sfb->slock); - sfb->bus_clk = clk_get(dev, "lcd"); if (IS_ERR(sfb->bus_clk)) { dev_err(dev, "failed to get bus clock\n"); @@ -1556,7 +1549,7 @@ static int s3c_fb_resume(struct device *dev) return 0; } -static int s3c_fb_runtime_suspend(struct device *dev) +int s3c_fb_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c_fb *sfb = platform_get_drvdata(pdev); @@ -1576,7 +1569,7 @@ static int s3c_fb_runtime_suspend(struct device *dev) return 0; } -static int s3c_fb_runtime_resume(struct device *dev) +int s3c_fb_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c_fb *sfb = platform_get_drvdata(pdev); diff --git a/trunk/fs/ext4/ext4_jbd2.h b/trunk/fs/ext4/ext4_jbd2.h index e25e99bf7ee1..d0f53538a57f 100644 --- a/trunk/fs/ext4/ext4_jbd2.h +++ b/trunk/fs/ext4/ext4_jbd2.h @@ -86,8 +86,8 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) + * allocated so we need to update only data block */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ diff --git a/trunk/fs/ext4/fsync.c b/trunk/fs/ext4/fsync.c index 4673bc05274f..e9473cbe80df 100644 --- a/trunk/fs/ext4/fsync.c +++ b/trunk/fs/ext4/fsync.c @@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode) * the parent directory's parent as well, and so on recursively, if * they are also freshly created. */ -static void ext4_sync_parent(struct inode *inode) +static int ext4_sync_parent(struct inode *inode) { + struct writeback_control wbc; struct dentry *dentry = NULL; + int ret = 0; while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); @@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode) if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) break; inode = dentry->d_parent->d_inode; - sync_mapping_buffers(inode->i_mapping); + ret = sync_mapping_buffers(inode->i_mapping); + if (ret) + break; + memset(&wbc, 0, sizeof(wbc)); + wbc.sync_mode = WB_SYNC_ALL; + wbc.nr_to_write = 0; /* only write out the inode */ + ret = sync_inode(inode, &wbc); + if (ret) + break; } + return ret; } /* @@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync) if (!journal) { ret = generic_file_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) - ext4_sync_parent(inode); + ret = ext4_sync_parent(inode); goto out; } diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c index ad8e303c0d29..f2fa5e8a582c 100644 --- a/trunk/fs/ext4/inode.c +++ b/trunk/fs/ext4/inode.c @@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * for partial write. */ set_buffer_new(bh); + set_buffer_mapped(bh); } return 0; } @@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode) Indirect chain[4]; Indirect *partial; __le32 nr = 0; - int n; - ext4_lblk_t last_block; + int n = 0; + ext4_lblk_t last_block, max_block; unsigned blocksize = inode->i_sb->s_blocksize; trace_ext4_truncate_enter(inode); @@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode) last_block = (inode->i_size + blocksize-1) >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); if (inode->i_size & (blocksize - 1)) if (ext4_block_truncate_page(handle, mapping, inode->i_size)) goto out_stop; - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ + if (last_block != max_block) { + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + } /* * OK. This truncate is going to happen. We add the inode to the @@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode) */ ei->i_disksize = inode->i_size; - if (n == 1) { /* direct blocks */ + if (last_block == max_block) { + /* + * It is unnecessary to free any data blocks if last_block is + * equal to the indirect block limit. + */ + goto out_unlock; + } else if (n == 1) { /* direct blocks */ ext4_free_data(handle, inode, NULL, i_data+offsets[0], i_data + EXT4_NDIR_BLOCKS); goto do_indirects; @@ -4553,6 +4564,7 @@ void ext4_truncate(struct inode *inode) ; } +out_unlock: up_write(&ei->i_data_sem); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); @@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, /* if nrblocks are contiguous */ if (chunk) { /* - * With N contiguous data blocks, it need at most - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks - * 2 dindirect blocks - * 1 tindirect block + * With N contiguous data blocks, we need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, + * 2 dindirect blocks, and 1 tindirect block */ - indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); - return indirects + 3; + return DIV_ROUND_UP(nrblocks, + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; } /* * if nrblocks are not contiguous, worse case, each block touch diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c index 056474b7b8e0..8553dfb310af 100644 --- a/trunk/fs/ext4/super.c +++ b/trunk/fs/ext4/super.c @@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle) * journal_end calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if * appropriate. + * + * To avoid j_barrier hold in userspace when a user calls freeze(), + * ext4 prevents a new handle from being started by s_frozen, which + * is in an upper layer. */ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) { journal_t *journal; + handle_t *handle; if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); - vfs_check_frozen(sb, SB_FREEZE_TRANS); - /* Special case here: if the journal has aborted behind our - * backs (eg. EIO in the commit thread), then we still need to - * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; - if (journal) { - if (is_journal_aborted(journal)) { - ext4_abort(sb, "Detected aborted journal"); - return ERR_PTR(-EROFS); - } - return jbd2_journal_start(journal, nblocks); + handle = ext4_journal_current_handle(); + + /* + * If a handle has been started, it should be allowed to + * finish, otherwise deadlock could happen between freeze + * and others(e.g. truncate) due to the restart of the + * journal handle if the filesystem is forzen and active + * handles are not stopped. + */ + if (!handle) + vfs_check_frozen(sb, SB_FREEZE_TRANS); + + if (!journal) + return ext4_get_nojournal(); + /* + * Special case here: if the journal has aborted behind our + * backs (eg. EIO in the commit thread), then we still need to + * take the FS itself readonly cleanly. + */ + if (is_journal_aborted(journal)) { + ext4_abort(sb, "Detected aborted journal"); + return ERR_PTR(-EROFS); } - return ext4_get_nojournal(); + return jbd2_journal_start(journal, nblocks); } /* @@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb, mutex_unlock(&ext4_li_info->li_list_mtx); sbi->s_li_request = elr; + /* + * set elr to NULL here since it has been inserted to + * the request_list and the removal and free of it is + * handled by ext4_clear_request_list from now on. + */ + elr = NULL; if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { ret = ext4_run_lazyinit_thread(); @@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + init_timer(&sbi->s_err_report); + sbi->s_err_report.function = print_daily_error_info; + sbi->s_err_report.data = (unsigned long) sb; + err = percpu_counter_init(&sbi->s_freeblocks_counter, ext4_count_free_blocks(sb)); if (!err) { @@ -3646,9 +3673,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); - init_timer(&sbi->s_err_report); - sbi->s_err_report.function = print_daily_error_info; - sbi->s_err_report.data = (unsigned long) sb; if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -3672,6 +3696,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_journal = NULL; } failed_mount3: + del_timer(&sbi->s_err_report); if (sbi->s_flex_groups) { if (is_vmalloc_addr(sbi->s_flex_groups)) vfree(sbi->s_flex_groups); @@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) /* * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. + * + * Note that only this function cannot bring a filesystem to be in a clean + * state independently, because ext4 prevents a new handle from being started + * by @sb->s_frozen, which stays in an upper layer. It thus needs help from + * the upper layer. */ static int ext4_freeze(struct super_block *sb) { @@ -4614,11 +4644,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, static int ext4_quota_off(struct super_block *sb, int type) { + struct inode *inode = sb_dqopt(sb)->files[type]; + handle_t *handle; + /* Force all delayed allocation blocks to be allocated. * Caller already holds s_umount sem */ if (test_opt(sb, DELALLOC)) sync_filesystem(sb); + /* Update modification times of quota files when userspace can + * start looking at them */ + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + goto out; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + +out: return dquot_quota_off(sb, type); } @@ -4714,9 +4757,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; + ext4_mark_inode_dirty(handle, inode); } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); return len; } diff --git a/trunk/fs/jbd2/commit.c b/trunk/fs/jbd2/commit.c index 20af62f4304b..6e28000a4b21 100644 --- a/trunk/fs/jbd2/commit.c +++ b/trunk/fs/jbd2/commit.c @@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal, int ret; struct timespec now = current_kernel_time(); + *cbh = NULL; + if (is_journal_aborted(journal)) return 0; @@ -806,7 +808,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (err) __jbd2_journal_abort_hard(journal); } - if (!err && !is_journal_aborted(journal)) + if (cbh) err = journal_wait_on_commit_record(journal, cbh); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c index aba8ebaec25c..e0ec3db1c395 100644 --- a/trunk/fs/jbd2/journal.c +++ b/trunk/fs/jbd2/journal.c @@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device) new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); if (!new_dev) return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + bd = bdget(device); spin_lock(&devname_cache_lock); if (devcache[i]) { if (devcache[i]->device == device) { kfree(new_dev); + bdput(bd); ret = devcache[i]->devname; spin_unlock(&devname_cache_lock); return ret; @@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device) } devcache[i] = new_dev; devcache[i]->device = device; - bd = bdget(device); if (bd) { bdevname(bd, devcache[i]->devname); bdput(bd); diff --git a/trunk/fs/nfsd/lockd.c b/trunk/fs/nfsd/lockd.c index 0c6d81670137..7c831a2731fa 100644 --- a/trunk/fs/nfsd/lockd.c +++ b/trunk/fs/nfsd/lockd.c @@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) exp_readlock(); nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); - rqstp->rq_client = NULL; exp_readunlock(); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. diff --git a/trunk/fs/nfsd/nfs4state.c b/trunk/fs/nfsd/nfs4state.c index 4b36ec3eb8ea..aa309aa93fe8 100644 --- a/trunk/fs/nfsd/nfs4state.c +++ b/trunk/fs/nfsd/nfs4state.c @@ -397,10 +397,13 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp) static void free_generic_stateid(struct nfs4_stateid *stp) { - int oflag = nfs4_access_bmap_to_omode(stp); + int oflag; - nfs4_file_put_access(stp->st_file, oflag); - put_nfs4_file(stp->st_file); + if (stp->st_access_bmap) { + oflag = nfs4_access_bmap_to_omode(stp); + nfs4_file_put_access(stp->st_file, oflag); + put_nfs4_file(stp->st_file); + } kmem_cache_free(stateid_slab, stp); } diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c index 5ea402023ebd..9ef9ed2cfe2e 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.c +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c @@ -293,7 +293,6 @@ xfs_buf_allocate_memory( size_t nbytes, offset; gfp_t gfp_mask = xb_to_gfp(flags); unsigned short page_count, i; - pgoff_t first; xfs_off_t end; int error; @@ -333,7 +332,6 @@ xfs_buf_allocate_memory( return error; offset = bp->b_offset; - first = bp->b_file_offset >> PAGE_SHIFT; bp->b_flags |= _XBF_PAGES; for (i = 0; i < bp->b_page_count; i++) { @@ -657,8 +655,6 @@ xfs_buf_readahead( xfs_off_t ioff, size_t isize) { - struct backing_dev_info *bdi; - if (bdi_read_congested(target->bt_bdi)) return; @@ -919,8 +915,6 @@ xfs_buf_lock( if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); - if (atomic_read(&bp->b_io_remaining)) - blk_flush_plug(current); down(&bp->b_sema); XB_SET_OWNER(bp); @@ -1309,8 +1303,6 @@ xfs_buf_iowait( { trace_xfs_buf_iowait(bp, _RET_IP_); - if (atomic_read(&bp->b_io_remaining)) - blk_flush_plug(current); wait_for_completion(&bp->b_iowait); trace_xfs_buf_iowait_done(bp, _RET_IP_); @@ -1747,8 +1739,8 @@ xfsbufd( do { long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); - int count = 0; struct list_head tmp; + struct blk_plug plug; if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); @@ -1764,16 +1756,15 @@ xfsbufd( xfs_buf_delwri_split(target, &tmp, age); list_sort(NULL, &tmp, xfs_buf_cmp); + + blk_start_plug(&plug); while (!list_empty(&tmp)) { struct xfs_buf *bp; bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_bdstrat_cb(bp); - count++; } - if (count) - blk_flush_plug(current); - + blk_finish_plug(&plug); } while (!kthread_should_stop()); return 0; @@ -1793,6 +1784,7 @@ xfs_flush_buftarg( int pincount = 0; LIST_HEAD(tmp_list); LIST_HEAD(wait_list); + struct blk_plug plug; xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); @@ -1807,6 +1799,8 @@ xfs_flush_buftarg( * we do that after issuing all the IO. */ list_sort(NULL, &tmp_list, xfs_buf_cmp); + + blk_start_plug(&plug); while (!list_empty(&tmp_list)) { bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); @@ -1817,10 +1811,10 @@ xfs_flush_buftarg( } xfs_bdstrat_cb(bp); } + blk_finish_plug(&plug); if (wait) { - /* Expedite and wait for IO to complete. */ - blk_flush_plug(current); + /* Wait for IO to complete. */ while (!list_empty(&wait_list)) { bp = list_first_entry(&wait_list, struct xfs_buf, b_list); diff --git a/trunk/fs/xfs/linux-2.6/xfs_message.c b/trunk/fs/xfs/linux-2.6/xfs_message.c index 508e06fd7d1e..3ca795609113 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_message.c +++ b/trunk/fs/xfs/linux-2.6/xfs_message.c @@ -28,53 +28,47 @@ /* * XFS logging functions */ -static int +static void __xfs_printk( const char *level, const struct xfs_mount *mp, struct va_format *vaf) { if (mp && mp->m_fsname) - return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); - return printk("%sXFS: %pV\n", level, vaf); + printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); + printk("%sXFS: %pV\n", level, vaf); } -int xfs_printk( +void xfs_printk( const char *level, const struct xfs_mount *mp, const char *fmt, ...) { struct va_format vaf; va_list args; - int r; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - r = __xfs_printk(level, mp, &vaf); + __xfs_printk(level, mp, &vaf); va_end(args); - - return r; } #define define_xfs_printk_level(func, kern_level) \ -int func(const struct xfs_mount *mp, const char *fmt, ...) \ +void func(const struct xfs_mount *mp, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ - int r; \ \ va_start(args, fmt); \ \ vaf.fmt = fmt; \ vaf.va = &args; \ \ - r = __xfs_printk(kern_level, mp, &vaf); \ + __xfs_printk(kern_level, mp, &vaf); \ va_end(args); \ - \ - return r; \ } \ define_xfs_printk_level(xfs_emerg, KERN_EMERG); @@ -88,7 +82,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO); define_xfs_printk_level(xfs_debug, KERN_DEBUG); #endif -int +void xfs_alert_tag( const struct xfs_mount *mp, int panic_tag, @@ -97,7 +91,6 @@ xfs_alert_tag( struct va_format vaf; va_list args; int do_panic = 0; - int r; if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { xfs_printk(KERN_ALERT, mp, @@ -110,12 +103,10 @@ xfs_alert_tag( vaf.fmt = fmt; vaf.va = &args; - r = __xfs_printk(KERN_ALERT, mp, &vaf); + __xfs_printk(KERN_ALERT, mp, &vaf); va_end(args); BUG_ON(do_panic); - - return r; } void diff --git a/trunk/fs/xfs/linux-2.6/xfs_message.h b/trunk/fs/xfs/linux-2.6/xfs_message.h index e77ffa16745b..f1b3fc1b6c4e 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_message.h +++ b/trunk/fs/xfs/linux-2.6/xfs_message.h @@ -3,32 +3,34 @@ struct xfs_mount; -extern int xfs_printk(const char *level, const struct xfs_mount *mp, +extern void xfs_printk(const char *level, const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, +extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); #ifdef DEBUG -extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); #else -#define xfs_debug(mp, fmt, ...) (0) +static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +{ +} #endif extern void assfail(char *expr, char *f, int l); diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c index 1ba5c451da36..b38e58d02299 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_super.c +++ b/trunk/fs/xfs/linux-2.6/xfs_super.c @@ -816,75 +816,6 @@ xfs_setup_devices( return 0; } -/* - * XFS AIL push thread support - */ -void -xfsaild_wakeup( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) -{ - /* only ever move the target forwards */ - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { - ailp->xa_target = threshold_lsn; - wake_up_process(ailp->xa_task); - } -} - -STATIC int -xfsaild( - void *data) -{ - struct xfs_ail *ailp = data; - xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; /* milliseconds */ - - while (!kthread_should_stop()) { - /* - * for short sleeps indicating congestion, don't allow us to - * get woken early. Otherwise all we do is bang on the AIL lock - * without making progress. - */ - if (tout && tout <= 20) - __set_current_state(TASK_KILLABLE); - else - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(tout ? - msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); - - /* swsusp */ - try_to_freeze(); - - ASSERT(ailp->xa_mount->m_log); - if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - continue; - - tout = xfsaild_push(ailp, &last_pushed_lsn); - } - - return 0; -} /* xfsaild */ - -int -xfsaild_start( - struct xfs_ail *ailp) -{ - ailp->xa_target = 0; - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", - ailp->xa_mount->m_fsname); - if (IS_ERR(ailp->xa_task)) - return -PTR_ERR(ailp->xa_task); - return 0; -} - -void -xfsaild_stop( - struct xfs_ail *ailp) -{ - kthread_stop(ailp->xa_task); -} - - /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -1191,22 +1122,12 @@ xfs_fs_sync_fs( return -error; if (laptop_mode) { - int prev_sync_seq = mp->m_sync_seq; - /* * The disk must be active because we're syncing. * We schedule xfssyncd now (now that the disk is * active) instead of later (when it might not be). */ - wake_up_process(mp->m_sync_task); - /* - * We have to wait for the sync iteration to complete. - * If we don't, the disk activity caused by the sync - * will come after the sync is completed, and that - * triggers another sync from laptop mode. - */ - wait_event(mp->m_wait_single_sync_task, - mp->m_sync_seq != prev_sync_seq); + flush_delayed_work_sync(&mp->m_sync_work); } return 0; @@ -1490,9 +1411,6 @@ xfs_fs_fill_super( spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); - INIT_LIST_HEAD(&mp->m_sync_list); - spin_lock_init(&mp->m_sync_lock); - init_waitqueue_head(&mp->m_wait_single_sync_task); mp->m_super = sb; sb->s_fs_info = mp; @@ -1798,6 +1716,38 @@ xfs_destroy_zones(void) } +STATIC int __init +xfs_init_workqueues(void) +{ + /* + * max_active is set to 8 to give enough concurency to allow + * multiple work operations on each CPU to run. This allows multiple + * filesystems to be running sync work concurrently, and scales with + * the number of CPUs in the system. + */ + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + if (!xfs_syncd_wq) + goto out; + + xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); + if (!xfs_ail_wq) + goto out_destroy_syncd; + + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); +out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_workqueues(void) +{ + destroy_workqueue(xfs_ail_wq); + destroy_workqueue(xfs_syncd_wq); +} + STATIC int __init init_xfs_fs(void) { @@ -1813,10 +1763,14 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_mru_cache_init(); + error = xfs_init_workqueues(); if (error) goto out_destroy_zones; + error = xfs_mru_cache_init(); + if (error) + goto out_destroy_wq; + error = xfs_filestream_init(); if (error) goto out_mru_cache_uninit; @@ -1833,6 +1787,10 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; + error = xfs_init_workqueues(); + if (error) + goto out_sysctl_unregister; + vfs_initquota(); error = register_filesystem(&xfs_fs_type); @@ -1850,6 +1808,8 @@ init_xfs_fs(void) xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); + out_destroy_wq: + xfs_destroy_workqueues(); out_destroy_zones: xfs_destroy_zones(); out: @@ -1866,6 +1826,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_filestream_uninit(); xfs_mru_cache_uninit(); + xfs_destroy_workqueues(); xfs_destroy_zones(); } diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index 9cf35a688f53..e4f9c1b0836c 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -22,6 +22,7 @@ #include "xfs_log.h" #include "xfs_inum.h" #include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" @@ -39,6 +40,8 @@ #include #include +struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between @@ -431,62 +434,12 @@ xfs_quiesce_attr( xfs_unmountfs_writesb(mp); } -/* - * Enqueue a work item to be picked up by the vfs xfssyncd thread. - * Doing this has two advantages: - * - It saves on stack space, which is tight in certain situations - * - It can be used (with care) as a mechanism to avoid deadlocks. - * Flushing while allocating in a full filesystem requires both. - */ -STATIC void -xfs_syncd_queue_work( - struct xfs_mount *mp, - void *data, - void (*syncer)(struct xfs_mount *, void *), - struct completion *completion) -{ - struct xfs_sync_work *work; - - work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); - INIT_LIST_HEAD(&work->w_list); - work->w_syncer = syncer; - work->w_data = data; - work->w_mount = mp; - work->w_completion = completion; - spin_lock(&mp->m_sync_lock); - list_add_tail(&work->w_list, &mp->m_sync_list); - spin_unlock(&mp->m_sync_lock); - wake_up_process(mp->m_sync_task); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room... - */ -STATIC void -xfs_flush_inodes_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); - iput(inode); -} - -void -xfs_flush_inodes( - xfs_inode_t *ip) +static void +xfs_syncd_queue_sync( + struct xfs_mount *mp) { - struct inode *inode = VFS_I(ip); - DECLARE_COMPLETION_ONSTACK(completion); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); - wait_for_completion(&completion); - xfs_log_force(ip->i_mount, XFS_LOG_SYNC); + queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); } /* @@ -496,9 +449,10 @@ xfs_flush_inodes( */ STATIC void xfs_sync_worker( - struct xfs_mount *mp, - void *unused) + struct work_struct *work) { + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_sync_work); int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { @@ -508,73 +462,106 @@ xfs_sync_worker( error = xfs_fs_log_dummy(mp); else xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); error = xfs_qm_sync(mp, SYNC_TRYLOCK); + + /* start pushing all the metadata that is currently dirty */ + xfs_ail_push_all(mp->m_ail); } - mp->m_sync_seq++; - wake_up(&mp->m_wait_single_sync_task); + + /* queue us up again */ + xfs_syncd_queue_sync(mp); } -STATIC int -xfssyncd( - void *arg) +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs syncd work default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_syncd_queue_reclaim( + struct xfs_mount *mp) { - struct xfs_mount *mp = arg; - long timeleft; - xfs_sync_work_t *work, *n; - LIST_HEAD (tmp); - - set_freezable(); - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); - for (;;) { - if (list_empty(&mp->m_sync_list)) - timeleft = schedule_timeout_interruptible(timeleft); - /* swsusp */ - try_to_freeze(); - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) - break; - spin_lock(&mp->m_sync_lock); - /* - * We can get woken by laptop mode, to do a sync - - * that's the (only!) case where the list would be - * empty with time remaining. - */ - if (!timeleft || list_empty(&mp->m_sync_list)) { - if (!timeleft) - timeleft = xfs_syncd_centisecs * - msecs_to_jiffies(10); - INIT_LIST_HEAD(&mp->m_sync_work.w_list); - list_add_tail(&mp->m_sync_work.w_list, - &mp->m_sync_list); - } - list_splice_init(&mp->m_sync_list, &tmp); - spin_unlock(&mp->m_sync_lock); + /* + * We can have inodes enter reclaim after we've shut down the syncd + * workqueue during unmount, so don't allow reclaim work to be queued + * during unmount. + */ + if (!(mp->m_super->s_flags & MS_ACTIVE)) + return; - list_for_each_entry_safe(work, n, &tmp, w_list) { - (*work->w_syncer)(mp, work->w_data); - list_del(&work->w_list); - if (work == &mp->m_sync_work) - continue; - if (work->w_completion) - complete(work->w_completion); - kmem_free(work); - } + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } + rcu_read_unlock(); +} - return 0; +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +STATIC void +xfs_reclaim_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_reclaim_work); + + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); + xfs_syncd_queue_reclaim(mp); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room. + * + * Queue a new data flush if there isn't one already in progress and + * wait for completion of the flush. This means that we only ever have one + * inode flush in progress no matter how many ENOSPC events are occurring and + * so will prevent the system from bogging down due to every concurrent + * ENOSPC event scanning all the active inodes in the system for writeback. + */ +void +xfs_flush_inodes( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + queue_work(xfs_syncd_wq, &mp->m_flush_work); + flush_work_sync(&mp->m_flush_work); +} + +STATIC void +xfs_flush_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, + struct xfs_mount, m_flush_work); + + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); } int xfs_syncd_init( struct xfs_mount *mp) { - mp->m_sync_work.w_syncer = xfs_sync_worker; - mp->m_sync_work.w_mount = mp; - mp->m_sync_work.w_completion = NULL; - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); - if (IS_ERR(mp->m_sync_task)) - return -PTR_ERR(mp->m_sync_task); + INIT_WORK(&mp->m_flush_work, xfs_flush_worker); + INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + + xfs_syncd_queue_sync(mp); + xfs_syncd_queue_reclaim(mp); + return 0; } @@ -582,7 +569,9 @@ void xfs_syncd_stop( struct xfs_mount *mp) { - kthread_stop(mp->m_sync_task); + cancel_delayed_work_sync(&mp->m_sync_work); + cancel_delayed_work_sync(&mp->m_reclaim_work); + cancel_work_sync(&mp->m_flush_work); } void @@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag( XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); + + /* schedule periodic background inode reclaim */ + xfs_syncd_queue_reclaim(ip->i_mount); + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } @@ -1017,7 +1010,13 @@ xfs_reclaim_inodes( } /* - * Shrinker infrastructure. + * Inode cache shrinker. + * + * When called we make sure that there is a background (fast) inode reclaim in + * progress, while we will throttle the speed of reclaim via doiing synchronous + * reclaim of inodes. That means if we come across dirty inodes, we wait for + * them to be cleaned, which we hope will not be very long due to the + * background walker having already kicked the IO off on those dirty inodes. */ static int xfs_reclaim_inode_shrink( @@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink( mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { + /* kick background reclaimer and push the AIL */ + xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); + if (!(gfp_mask & __GFP_FS)) return -1; - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, + &nr_to_scan); /* terminate if we don't exhaust the scan */ if (nr_to_scan > 0) return -1; diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h index 32ba6628290c..e3a6ad27415f 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.h +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h @@ -32,6 +32,8 @@ typedef struct xfs_sync_work { #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ +extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c index 254ee062bd7d..69228aa8605a 100644 --- a/trunk/fs/xfs/quota/xfs_qm.c +++ b/trunk/fs/xfs/quota/xfs_qm.c @@ -461,12 +461,10 @@ xfs_qm_dqflush_all( struct xfs_quotainfo *q = mp->m_quotainfo; int recl; struct xfs_dquot *dqp; - int niters; int error; if (!q) return 0; - niters = 0; again: mutex_lock(&q->qi_dqlist_lock); list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { @@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs( { xfs_buf_t *bp; int error; - int notcommitted; - int incr; int type; ASSERT(blkcnt > 0); - notcommitted = 0; - incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? - XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; diff --git a/trunk/fs/xfs/quota/xfs_qm.h b/trunk/fs/xfs/quota/xfs_qm.h index c9446f1c726d..567b29b9f1b3 100644 --- a/trunk/fs/xfs/quota/xfs_qm.h +++ b/trunk/fs/xfs/quota/xfs_qm.h @@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone; * block in the dquot/xqm code. */ #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 -/* - * When doing a quotacheck, we log dquot clusters of this many FSBs at most - * in a single transaction. We don't want to ask for too huge a log reservation. - */ -#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 typedef xfs_dqhash_t xfs_dqlist_t; diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c index 0d62a07b7fd8..2dadb15d5ca9 100644 --- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c +++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c @@ -313,14 +313,12 @@ xfs_qm_scall_quotaon( { int error; uint qf; - uint accflags; __int64_t sbflags; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); /* * Switching on quota accounting must be done at mount time. */ - accflags = flags & XFS_ALL_QUOTA_ACCT; flags &= ~(XFS_ALL_QUOTA_ACCT); sbflags = 0; diff --git a/trunk/fs/xfs/xfs_alloc.c b/trunk/fs/xfs/xfs_alloc.c index 4bc3c649aee4..27d64d752eab 100644 --- a/trunk/fs/xfs/xfs_alloc.c +++ b/trunk/fs/xfs/xfs_alloc.c @@ -2395,17 +2395,33 @@ xfs_free_extent( memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; + + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ args.agno = XFS_FSB_TO_AGNO(args.mp, bno); - ASSERT(args.agno < args.mp->m_sb.sb_agcount); + if (args.agno >= args.mp->m_sb.sb_agcount) + return EFSCORRUPTED; + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + if (args.agbno >= args.mp->m_sb.sb_agblocks) + return EFSCORRUPTED; + args.pag = xfs_perag_get(args.mp, args.agno); - if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) + ASSERT(args.pag); + + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) goto error0; -#ifdef DEBUG - ASSERT(args.agbp != NULL); - ASSERT((args.agbno + len) <= - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); -#endif + + /* validate the extent size is legal now we have the agf locked */ + if (args.agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { + error = EFSCORRUPTED; + goto error0; + } + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c index 46cc40131d4a..576fdfe81d60 100644 --- a/trunk/fs/xfs/xfs_inode_item.c +++ b/trunk/fs/xfs/xfs_inode_item.c @@ -197,6 +197,41 @@ xfs_inode_item_size( return nvecs; } +/* + * xfs_inode_item_format_extents - convert in-core extents to on-disk form + * + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode. In this case, we + * need to do this conversion before we write the extents into the log. Because + * we don't have the disk inode to write into here, we allocate a buffer and + * format the extents into it via xfs_iextents_copy(). We free the buffer in + * the unlock routine after the copy for the log has been made. + * + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only log on-disk extents + * here, so always use the physical fork size to determine the size of the + * buffer we need to allocate. + */ +STATIC void +xfs_inode_item_format_extents( + struct xfs_inode *ip, + struct xfs_log_iovec *vecp, + int whichfork, + int type) +{ + xfs_bmbt_rec_t *ext_buffer; + + ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); + if (whichfork == XFS_DATA_FORK) + ip->i_itemp->ili_extents_buf = ext_buffer; + else + ip->i_itemp->ili_aextents_buf = ext_buffer; + + vecp->i_addr = ext_buffer; + vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); + vecp->i_type = type; +} + /* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode @@ -213,7 +248,6 @@ xfs_inode_item_format( struct xfs_inode *ip = iip->ili_inode; uint nvecs; size_t data_bytes; - xfs_bmbt_rec_t *ext_buffer; xfs_mount_t *mp; vecp->i_addr = &iip->ili_format; @@ -320,22 +354,8 @@ xfs_inode_item_format( } else #endif { - /* - * There are delayed allocation extents - * in the inode, or we need to convert - * the extents to on disk format. - * Use xfs_iextents_copy() - * to copy only the real extents into - * a separate buffer. We'll free the - * buffer in the unlock routine. - */ - ext_buffer = kmem_alloc(ip->i_df.if_bytes, - KM_SLEEP); - iip->ili_extents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, - XFS_DATA_FORK); - vecp->i_type = XLOG_REG_TYPE_IEXT; + xfs_inode_item_format_extents(ip, vecp, + XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -445,19 +465,12 @@ xfs_inode_item_format( */ vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; + vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; #else ASSERT(iip->ili_aextents_buf == NULL); - /* - * Need to endian flip before logging - */ - ext_buffer = kmem_alloc(ip->i_afp->if_bytes, - KM_SLEEP); - iip->ili_aextents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, - XFS_ATTR_FORK); + xfs_inode_item_format_extents(ip, vecp, + XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif - vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; diff --git a/trunk/fs/xfs/xfs_itable.c b/trunk/fs/xfs/xfs_itable.c index dc1882adaf54..751e94fe1f77 100644 --- a/trunk/fs/xfs/xfs_itable.c +++ b/trunk/fs/xfs/xfs_itable.c @@ -204,7 +204,6 @@ xfs_bulkstat( xfs_agi_t *agi; /* agi header data */ xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ - xfs_daddr_t bno; /* inode cluster start daddr */ int chunkidx; /* current index into inode chunk */ int clustidx; /* current index into inode cluster */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ @@ -463,7 +462,6 @@ xfs_bulkstat( mp->m_sb.sb_inopblog); } ino = XFS_AGINO_TO_INO(mp, agno, agino); - bno = XFS_AGB_TO_DADDR(mp, agno, agbno); /* * Skip if this inode is free. */ diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 25efa9b8a602..b612ce4520ae 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (!xfs_trans_ail_tail(log->l_ailp) && + if (!xfs_ail_min_lsn(log->l_ailp) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; @@ -801,7 +801,7 @@ xlog_assign_tail_lsn( xfs_lsn_t tail_lsn; struct log *log = mp->m_log; - tail_lsn = xfs_trans_ail_tail(mp->m_ail); + tail_lsn = xfs_ail_min_lsn(mp->m_ail); if (!tail_lsn) tail_lsn = atomic64_read(&log->l_last_sync_lsn); @@ -1239,7 +1239,7 @@ xlog_grant_push_ail( * the filesystem is shutting down. */ if (!XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_ail_push(log->l_ailp, threshold_lsn); + xfs_ail_push(log->l_ailp, threshold_lsn); } /* @@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr( xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); } +/* + * Check to make sure the grant write head didn't just over lap the tail. If + * the cycles are the same, we can't be overlapping. Otherwise, make sure that + * the cycles differ by exactly one and check the byte count. + * + * This check is run unlocked, so can give false positives. Rather than assert + * on failures, use a warn-once flag and a panic tag to allow the admin to + * determine if they want to panic the machine when such an error occurs. For + * debug kernels this will have the same effect as using an assert but, unlinke + * an assert, it can be turned off at runtime. + */ STATIC void xlog_verify_grant_tail( struct log *log) @@ -3414,17 +3425,22 @@ xlog_verify_grant_tail( int tail_cycle, tail_blocks; int cycle, space; - /* - * Check to make sure the grant write head didn't just over lap the - * tail. If the cycles are the same, we can't be overlapping. - * Otherwise, make sure that the cycles differ by exactly one and - * check the byte count. - */ xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { - ASSERT(cycle - 1 == tail_cycle); - ASSERT(space <= BBTOB(tail_blocks)); + if (cycle - 1 != tail_cycle && + !(log->l_flags & XLOG_TAIL_WARN)) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "%s: cycle - 1 != tail_cycle", __func__); + log->l_flags |= XLOG_TAIL_WARN; + } + + if (space > BBTOB(tail_blocks) && + !(log->l_flags & XLOG_TAIL_WARN)) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "%s: space > BBTOB(tail_blocks)", __func__); + log->l_flags |= XLOG_TAIL_WARN; + } } } diff --git a/trunk/fs/xfs/xfs_log_priv.h b/trunk/fs/xfs/xfs_log_priv.h index ffae692c9832..5864850e9e34 100644 --- a/trunk/fs/xfs/xfs_log_priv.h +++ b/trunk/fs/xfs/xfs_log_priv.h @@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i) #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being shutdown */ +#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ #ifdef __KERNEL__ /* diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index a62e8971539d..19af0ab0d0c6 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -203,12 +203,9 @@ typedef struct xfs_mount { struct mutex m_icsb_mutex; /* balancer sync lock */ #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ - struct task_struct *m_sync_task; /* generalised sync thread */ - xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ - struct list_head m_sync_list; /* sync thread work item list */ - spinlock_t m_sync_lock; /* work item list lock */ - int m_sync_seq; /* sync thread generation no. */ - wait_queue_head_t m_wait_single_sync_task; + struct delayed_work m_sync_work; /* background sync work */ + struct delayed_work m_reclaim_work; /* background inode reclaim */ + struct work_struct m_flush_work; /* background inode flush */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c index 12aff9584e29..acdb92f14d51 100644 --- a/trunk/fs/xfs/xfs_trans_ail.c +++ b/trunk/fs/xfs/xfs_trans_ail.c @@ -28,74 +28,138 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); -STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); -STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); +struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ #ifdef DEBUG -STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); -#else +/* + * Check that the list is sorted as it should be. + */ +STATIC void +xfs_ail_check( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_log_item_t *prev_lip; + + if (list_empty(&ailp->xa_ail)) + return; + + /* + * Check the next and previous entries are valid. + */ + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + + prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); + + +#ifdef XFS_TRANS_DEBUG + /* + * Walk the list checking lsn ordering, and that every entry has the + * XFS_LI_IN_AIL flag set. This is really expensive, so only do it + * when specifically debugging the transaction subsystem. + */ + prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = lip; + } +#endif /* XFS_TRANS_DEBUG */ +} +#else /* !DEBUG */ #define xfs_ail_check(a,l) #endif /* DEBUG */ +/* + * Return a pointer to the first item in the AIL. If the AIL is empty, then + * return NULL. + */ +static xfs_log_item_t * +xfs_ail_min( + struct xfs_ail *ailp) +{ + if (list_empty(&ailp->xa_ail)) + return NULL; + + return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); +} + + /* + * Return a pointer to the last item in the AIL. If the AIL is empty, then + * return NULL. + */ +static xfs_log_item_t * +xfs_ail_max( + struct xfs_ail *ailp) +{ + if (list_empty(&ailp->xa_ail)) + return NULL; + + return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); +} + +/* + * Return a pointer to the item which follows the given item in the AIL. If + * the given item is the last item in the list, then return NULL. + */ +static xfs_log_item_t * +xfs_ail_next( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + if (lip->li_ail.next == &ailp->xa_ail) + return NULL; + + return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); +} /* - * This is called by the log manager code to determine the LSN - * of the tail of the log. This is exactly the LSN of the first - * item in the AIL. If the AIL is empty, then this function - * returns 0. + * This is called by the log manager code to determine the LSN of the tail of + * the log. This is exactly the LSN of the first item in the AIL. If the AIL + * is empty, then this function returns 0. * - * We need the AIL lock in order to get a coherent read of the - * lsn of the last item in the AIL. + * We need the AIL lock in order to get a coherent read of the lsn of the last + * item in the AIL. */ xfs_lsn_t -xfs_trans_ail_tail( +xfs_ail_min_lsn( struct xfs_ail *ailp) { - xfs_lsn_t lsn; + xfs_lsn_t lsn = 0; xfs_log_item_t *lip; spin_lock(&ailp->xa_lock); lip = xfs_ail_min(ailp); - if (lip == NULL) { - lsn = (xfs_lsn_t)0; - } else { + if (lip) lsn = lip->li_lsn; - } spin_unlock(&ailp->xa_lock); return lsn; } /* - * xfs_trans_push_ail - * - * This routine is called to move the tail of the AIL forward. It does this by - * trying to flush items in the AIL whose lsns are below the given - * threshold_lsn. - * - * the push is run asynchronously in a separate thread, so we return the tail - * of the log right now instead of the tail after the push. This means we will - * either continue right away, or we will sleep waiting on the async thread to - * do its work. - * - * We do this unlocked - we only need to know whether there is anything in the - * AIL at the time we are called. We don't need to access the contents of - * any of the objects, so the lock is not needed. + * Return the maximum lsn held in the AIL, or zero if the AIL is empty. */ -void -xfs_trans_ail_push( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) +static xfs_lsn_t +xfs_ail_max_lsn( + struct xfs_ail *ailp) { - xfs_log_item_t *lip; + xfs_lsn_t lsn = 0; + xfs_log_item_t *lip; - lip = xfs_ail_min(ailp); - if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) - xfsaild_wakeup(ailp, threshold_lsn); - } + spin_lock(&ailp->xa_lock); + lip = xfs_ail_max(ailp); + if (lip) + lsn = lip->li_lsn; + spin_unlock(&ailp->xa_lock); + + return lsn; } /* @@ -236,16 +300,57 @@ xfs_trans_ail_cursor_first( } /* - * xfsaild_push does the work of pushing on the AIL. Returning a timeout of - * zero indicates that the caller should sleep until woken. + * splice the log item list into the AIL at the given LSN. */ -long -xfsaild_push( - struct xfs_ail *ailp, - xfs_lsn_t *last_lsn) +static void +xfs_ail_splice( + struct xfs_ail *ailp, + struct list_head *list, + xfs_lsn_t lsn) { - long tout = 0; - xfs_lsn_t last_pushed_lsn = *last_lsn; + xfs_log_item_t *next_lip; + + /* If the list is empty, just insert the item. */ + if (list_empty(&ailp->xa_ail)) { + list_splice(list, &ailp->xa_ail); + return; + } + + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) + break; + } + + ASSERT(&next_lip->li_ail == &ailp->xa_ail || + XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); + + list_splice_init(list, &next_lip->li_ail); +} + +/* + * Delete the given item from the AIL. Return a pointer to the item. + */ +static void +xfs_ail_delete( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_ail_check(ailp, lip); + list_del(&lip->li_ail); + xfs_trans_ail_cursor_clear(ailp, lip); +} + +/* + * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself + * to run at a later time if there is more work to do to complete the push. + */ +STATIC void +xfs_ail_worker( + struct work_struct *work) +{ + struct xfs_ail *ailp = container_of(to_delayed_work(work), + struct xfs_ail, xa_work); + long tout; xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; xfs_log_item_t *lip; @@ -256,15 +361,15 @@ xfsaild_push( spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); + lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - *last_lsn = 0; - return tout; + ailp->xa_last_pushed_lsn = 0; + return; } XFS_STATS_INC(xs_push_ail); @@ -301,13 +406,13 @@ xfsaild_push( case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); IOP_PUSH(lip); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; break; case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); IOP_PUSHBUF(lip); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; push_xfsbufd = 1; break; @@ -319,7 +424,7 @@ xfsaild_push( case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; stuck++; break; @@ -374,9 +479,23 @@ xfsaild_push( wake_up_process(mp->m_ddev_targp->bt_task); } + /* assume we have more work to do in a short while */ + tout = 10; if (!count) { /* We're past our target or empty, so idle */ - last_pushed_lsn = 0; + ailp->xa_last_pushed_lsn = 0; + + /* + * Check for an updated push target before clearing the + * XFS_AIL_PUSHING_BIT. If the target changed, we've got more + * work to do. Wait a bit longer before starting that work. + */ + smp_rmb(); + if (ailp->xa_target == target) { + clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); + return; + } + tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* * We reached the target so wait a bit longer for I/O to @@ -384,7 +503,7 @@ xfsaild_push( * start the next scan from the start of the AIL. */ tout = 50; - last_pushed_lsn = 0; + ailp->xa_last_pushed_lsn = 0; } else if ((stuck * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we @@ -396,14 +515,61 @@ xfsaild_push( * continuing from where we were. */ tout = 20; - } else { - /* more to do, but wait a short while before continuing */ - tout = 10; } - *last_lsn = last_pushed_lsn; - return tout; + + /* There is more to do, requeue us. */ + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, + msecs_to_jiffies(tout)); +} + +/* + * This routine is called to move the tail of the AIL forward. It does this by + * trying to flush items in the AIL whose lsns are below the given + * threshold_lsn. + * + * The push is run asynchronously in a workqueue, which means the caller needs + * to handle waiting on the async flush for space to become available. + * We don't want to interrupt any push that is in progress, hence we only queue + * work if we set the pushing bit approriately. + * + * We do this unlocked - we only need to know whether there is anything in the + * AIL at the time we are called. We don't need to access the contents of + * any of the objects, so the lock is not needed. + */ +void +xfs_ail_push( + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) +{ + xfs_log_item_t *lip; + + lip = xfs_ail_min(ailp); + if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || + XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) + return; + + /* + * Ensure that the new target is noticed in push code before it clears + * the XFS_AIL_PUSHING_BIT. + */ + smp_wmb(); + ailp->xa_target = threshold_lsn; + if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); } +/* + * Push out all items in the AIL immediately + */ +void +xfs_ail_push_all( + struct xfs_ail *ailp) +{ + xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); + + if (threshold_lsn) + xfs_ail_push(ailp, threshold_lsn); +} /* * This is to be called when an item is unlocked that may have @@ -615,7 +781,6 @@ xfs_trans_ail_init( xfs_mount_t *mp) { struct xfs_ail *ailp; - int error; ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) @@ -624,15 +789,9 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); spin_lock_init(&ailp->xa_lock); - error = xfsaild_start(ailp); - if (error) - goto out_free_ailp; + INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); mp->m_ail = ailp; return 0; - -out_free_ailp: - kmem_free(ailp); - return error; } void @@ -641,124 +800,6 @@ xfs_trans_ail_destroy( { struct xfs_ail *ailp = mp->m_ail; - xfsaild_stop(ailp); + cancel_delayed_work_sync(&ailp->xa_work); kmem_free(ailp); } - -/* - * splice the log item list into the AIL at the given LSN. - */ -STATIC void -xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) -{ - xfs_log_item_t *next_lip; - - /* - * If the list is empty, just insert the item. - */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; - } - - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; - } - - ASSERT((&next_lip->li_ail == &ailp->xa_ail) || - (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); - - list_splice_init(list, &next_lip->li_ail); - return; -} - -/* - * Delete the given item from the AIL. Return a pointer to the item. - */ -STATIC void -xfs_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - xfs_ail_check(ailp, lip); - list_del(&lip->li_ail); - xfs_trans_ail_cursor_clear(ailp, lip); -} - -/* - * Return a pointer to the first item in the AIL. - * If the AIL is empty, then return NULL. - */ -STATIC xfs_log_item_t * -xfs_ail_min( - struct xfs_ail *ailp) -{ - if (list_empty(&ailp->xa_ail)) - return NULL; - - return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); -} - -/* - * Return a pointer to the item which follows - * the given item in the AIL. If the given item - * is the last item in the list, then return NULL. - */ -STATIC xfs_log_item_t * -xfs_ail_next( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - if (lip->li_ail.next == &ailp->xa_ail) - return NULL; - - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); -} - -#ifdef DEBUG -/* - * Check that the list is sorted as it should be. - */ -STATIC void -xfs_ail_check( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - xfs_log_item_t *prev_lip; - - if (list_empty(&ailp->xa_ail)) - return; - - /* - * Check the next and previous entries are valid. - */ - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - - prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); - - -#ifdef XFS_TRANS_DEBUG - /* - * Walk the list checking lsn ordering, and that every entry has the - * XFS_LI_IN_AIL flag set. This is really expensive, so only do it - * when specifically debugging the transaction subsystem. - */ - prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); - list_for_each_entry(lip, &ailp->xa_ail, li_ail) { - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = lip; - } -#endif /* XFS_TRANS_DEBUG */ -} -#endif /* DEBUG */ diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h index 35162c238fa3..6b164e9e9a1f 100644 --- a/trunk/fs/xfs/xfs_trans_priv.h +++ b/trunk/fs/xfs/xfs_trans_priv.h @@ -65,16 +65,22 @@ struct xfs_ail_cursor { struct xfs_ail { struct xfs_mount *xa_mount; struct list_head xa_ail; - uint xa_gen; - struct task_struct *xa_task; xfs_lsn_t xa_target; struct xfs_ail_cursor xa_cursors; spinlock_t xa_lock; + struct delayed_work xa_work; + xfs_lsn_t xa_last_pushed_lsn; + unsigned long xa_flags; }; +#define XFS_AIL_PUSHING_BIT 0 + /* * From xfs_trans_ail.c */ + +extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ + void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); @@ -98,12 +104,13 @@ xfs_trans_ail_delete( xfs_trans_ail_delete_bulk(ailp, &lip, 1); } -void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_ail_push_all(struct xfs_ail *); +xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); + void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); - struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); @@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); -void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); -int xfsaild_start(struct xfs_ail *); -void xfsaild_stop(struct xfs_ail *); - #if BITS_PER_LONG != 64 static inline void xfs_trans_ail_copy_lsn( diff --git a/trunk/include/linux/can/platform/mcp251x.h b/trunk/include/linux/can/platform/mcp251x.h index 8e20540043f5..089fe43211a4 100644 --- a/trunk/include/linux/can/platform/mcp251x.h +++ b/trunk/include/linux/can/platform/mcp251x.h @@ -12,6 +12,7 @@ /** * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data * @oscillator_frequency: - oscillator frequency in Hz + * @irq_flags: - IRQF configuration flags * @board_specific_setup: - called before probing the chip (power,reset) * @transceiver_enable: - called to power on/off the transceiver * @power_enable: - called to power on/off the mcp *and* the @@ -24,6 +25,7 @@ struct mcp251x_platform_data { unsigned long oscillator_frequency; + unsigned long irq_flags; int (*board_specific_setup)(struct spi_device *spi); int (*transceiver_enable)(int enable); int (*power_enable) (int enable); diff --git a/trunk/include/linux/netfilter.h b/trunk/include/linux/netfilter.h index eeec00abb664..7fa95df60146 100644 --- a/trunk/include/linux/netfilter.h +++ b/trunk/include/linux/netfilter.h @@ -270,7 +270,8 @@ struct nf_afinfo { unsigned int dataoff, unsigned int len, u_int8_t protocol); - int (*route)(struct dst_entry **dst, struct flowi *fl); + int (*route)(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict); void (*saveroute)(const struct sk_buff *skb, struct nf_queue_entry *entry); int (*reroute)(struct sk_buff *skb, diff --git a/trunk/include/linux/netfilter/ipset/ip_set.h b/trunk/include/linux/netfilter/ipset/ip_set.h index ec333d83f3b4..5a262e3ae715 100644 --- a/trunk/include/linux/netfilter/ipset/ip_set.h +++ b/trunk/include/linux/netfilter/ipset/ip_set.h @@ -293,7 +293,7 @@ struct ip_set { /* Lock protecting the set data */ rwlock_t lock; /* References to the set */ - atomic_t ref; + u32 ref; /* The core set type */ struct ip_set_type *type; /* The type variant doing the real job */ diff --git a/trunk/include/linux/netfilter/ipset/ip_set_ahash.h b/trunk/include/linux/netfilter/ipset/ip_set_ahash.h index ec9d9bea1e37..a0196ac79051 100644 --- a/trunk/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/trunk/include/linux/netfilter/ipset/ip_set_ahash.h @@ -515,8 +515,7 @@ type_pf_head(struct ip_set *set, struct sk_buff *skb) if (h->netmask != HOST_MASK) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask); #endif - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)); if (with_timeout(h->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)); diff --git a/trunk/include/net/ip_vs.h b/trunk/include/net/ip_vs.h index 814b434db749..d516f00c8e0f 100644 --- a/trunk/include/net/ip_vs.h +++ b/trunk/include/net/ip_vs.h @@ -52,7 +52,7 @@ static inline struct net *skb_net(const struct sk_buff *skb) */ if (likely(skb->dev && skb->dev->nd_net)) return dev_net(skb->dev); - if (skb_dst(skb)->dev) + if (skb_dst(skb) && skb_dst(skb)->dev) return dev_net(skb_dst(skb)->dev); WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", __func__, __LINE__); diff --git a/trunk/include/net/mac80211.h b/trunk/include/net/mac80211.h index cb13239fe8e3..025d4cc7bbf8 100644 --- a/trunk/include/net/mac80211.h +++ b/trunk/include/net/mac80211.h @@ -1753,8 +1753,19 @@ enum ieee80211_ampdu_mlme_action { * that TX/RX_STOP can pass NULL for this parameter. * The @buf_size parameter is only valid when the action is set to * %IEEE80211_AMPDU_TX_OPERATIONAL and indicates the peer's reorder - * buffer size (number of subframes) for this session -- aggregates - * containing more subframes than this may not be transmitted to the peer. + * buffer size (number of subframes) for this session -- the driver + * may neither send aggregates containing more subframes than this + * nor send aggregates in a way that lost frames would exceed the + * buffer size. If just limiting the aggregate size, this would be + * possible with a buf_size of 8: + * - TX: 1.....7 + * - RX: 2....7 (lost frame #1) + * - TX: 8..1... + * which is invalid since #1 was now re-transmitted well past the + * buffer size of 8. Correct ways to retransmit #1 would be: + * - TX: 1 or 18 or 81 + * Even "189" would be wrong since 1 could be lost again. + * * Returns a negative error code on failure. * The callback can sleep. * diff --git a/trunk/include/net/route.h b/trunk/include/net/route.h index f88429cad52a..8fce0621cad1 100644 --- a/trunk/include/net/route.h +++ b/trunk/include/net/route.h @@ -64,6 +64,7 @@ struct rtable { __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ + int rt_route_iif; int rt_iif; int rt_oif; __u32 rt_mark; @@ -80,12 +81,12 @@ struct rtable { static inline bool rt_is_input_route(struct rtable *rt) { - return rt->rt_iif != 0; + return rt->rt_route_iif != 0; } static inline bool rt_is_output_route(struct rtable *rt) { - return rt->rt_iif == 0; + return rt->rt_route_iif == 0; } struct ip_rt_acct { diff --git a/trunk/net/dsa/mv88e6131.c b/trunk/net/dsa/mv88e6131.c index d951f93644bf..3da418894efc 100644 --- a/trunk/net/dsa/mv88e6131.c +++ b/trunk/net/dsa/mv88e6131.c @@ -14,6 +14,13 @@ #include "dsa_priv.h" #include "mv88e6xxx.h" +/* + * Switch product IDs + */ +#define ID_6085 0x04a0 +#define ID_6095 0x0950 +#define ID_6131 0x1060 + static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) { int ret; @@ -21,9 +28,11 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; - if (ret == 0x0950) + if (ret == ID_6085) + return "Marvell 88E6085"; + if (ret == ID_6095) return "Marvell 88E6095/88E6095F"; - if (ret == 0x1060) + if (ret == ID_6131) return "Marvell 88E6131"; } @@ -164,6 +173,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) static int mv88e6131_setup_port(struct dsa_switch *ds, int p) { + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); int addr = REG_PORT(p); u16 val; @@ -171,10 +181,13 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * MAC Forcing register: don't force link, speed, duplex * or flow control state to any particular values on physical * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. + * (100 Mb/s on 6085) full duplex. */ if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); + if (ps->id == ID_6085) + REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */ + else + REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */ else REG_WRITE(addr, 0x01, 0x0003); @@ -286,6 +299,8 @@ static int mv88e6131_setup(struct dsa_switch *ds) mv88e6xxx_ppu_state_init(ds); mutex_init(&ps->stats_mutex); + ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0; + ret = mv88e6131_switch_reset(ds); if (ret < 0) return ret; diff --git a/trunk/net/dsa/mv88e6xxx.h b/trunk/net/dsa/mv88e6xxx.h index eb0e0aaa9f1b..61156ca26a0d 100644 --- a/trunk/net/dsa/mv88e6xxx.h +++ b/trunk/net/dsa/mv88e6xxx.h @@ -39,6 +39,8 @@ struct mv88e6xxx_priv_state { * Hold this mutex over snapshot + dump sequences. */ struct mutex stats_mutex; + + int id; /* switch product id */ }; struct mv88e6xxx_hw_stat { diff --git a/trunk/net/ipv4/netfilter.c b/trunk/net/ipv4/netfilter.c index f3c0b549b8e1..4614babdc45f 100644 --- a/trunk/net/ipv4/netfilter.c +++ b/trunk/net/ipv4/netfilter.c @@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; } -static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict __always_unused) { - struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); + struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index ea107515c53e..c1acf69858fd 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -1891,6 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); @@ -2026,6 +2027,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_key_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; + rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); @@ -2202,6 +2204,7 @@ out: return err; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); @@ -2401,7 +2404,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_mark = oldflp4->flowi4_mark; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; - rth->rt_iif = 0; + rth->rt_route_iif = 0; + rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; @@ -2716,6 +2720,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_key_dst = ort->rt_key_dst; rt->rt_key_src = ort->rt_key_src; rt->rt_tos = ort->rt_tos; + rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; @@ -2725,7 +2730,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; - rt->rt_iif = ort->rt_iif; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; rt->peer = ort->peer; diff --git a/trunk/net/ipv4/xfrm4_policy.c b/trunk/net/ipv4/xfrm4_policy.c index 13e0e7f659ff..d20a05e970d8 100644 --- a/trunk/net/ipv4/xfrm4_policy.c +++ b/trunk/net/ipv4/xfrm4_policy.c @@ -74,6 +74,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl4->daddr; rt->rt_key_src = fl4->saddr; rt->rt_tos = fl4->flowi4_tos; + rt->rt_route_iif = fl4->flowi4_iif; rt->rt_iif = fl4->flowi4_iif; rt->rt_oif = fl4->flowi4_oif; rt->rt_mark = fl4->flowi4_mark; diff --git a/trunk/net/ipv6/netfilter.c b/trunk/net/ipv6/netfilter.c index 39aaca2b4fd2..28bc1f644b7b 100644 --- a/trunk/net/ipv6/netfilter.c +++ b/trunk/net/ipv6/netfilter.c @@ -90,9 +90,18 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { - *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); + static const struct ipv6_pinfo fake_pinfo; + static const struct inet_sock fake_sk = { + /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */ + .sk.sk_bound_dev_if = 1, + .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, + }; + const void *sk = strict ? &fake_sk : NULL; + + *dst = ip6_route_output(net, sk, &fl->u.ip6); return (*dst)->error; } diff --git a/trunk/net/ipv6/tcp_ipv6.c b/trunk/net/ipv6/tcp_ipv6.c index 56fa12538d45..4f49e5dd41bb 100644 --- a/trunk/net/ipv6/tcp_ipv6.c +++ b/trunk/net/ipv6/tcp_ipv6.c @@ -1622,6 +1622,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1649,7 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) __kfree_skb(opt_skb); return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; diff --git a/trunk/net/ipv6/udp.c b/trunk/net/ipv6/udp.c index d7037c006e13..15c37746845e 100644 --- a/trunk/net/ipv6/udp.c +++ b/trunk/net/ipv6/udp.c @@ -505,6 +505,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int rc; int is_udplite = IS_UDPLITE(sk); + if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + sock_rps_save_rxhash(sk, skb->rxhash); + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; diff --git a/trunk/net/mac80211/rx.c b/trunk/net/mac80211/rx.c index 9d192d665ff5..c5d4530d8284 100644 --- a/trunk/net/mac80211/rx.c +++ b/trunk/net/mac80211/rx.c @@ -2541,7 +2541,6 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) * same TID from the same station */ rx->skb = skb; - rx->flags = 0; CALL_RXH(ieee80211_rx_h_decrypt) CALL_RXH(ieee80211_rx_h_check_more_data) @@ -2612,6 +2611,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) .sdata = sta->sdata, .local = sta->local, .queue = tid, + .flags = 0, }; struct tid_ampdu_rx *tid_agg_rx; diff --git a/trunk/net/netfilter/Kconfig b/trunk/net/netfilter/Kconfig index c3f988aa1152..32bff6d86cb2 100644 --- a/trunk/net/netfilter/Kconfig +++ b/trunk/net/netfilter/Kconfig @@ -652,7 +652,6 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED - depends on (IPV6 || IPV6=n) ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/trunk/net/netfilter/ipset/ip_set_bitmap_ip.c b/trunk/net/netfilter/ipset/ip_set_bitmap_ip.c index bca96990218d..a113ff066928 100644 --- a/trunk/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/trunk/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/trunk/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/trunk/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 5e790172deff..00a33242e90c 100644 --- a/trunk/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/trunk/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -434,8 +434,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + (map->last_ip - map->first_ip + 1) * map->dsize)); diff --git a/trunk/net/netfilter/ipset/ip_set_bitmap_port.c b/trunk/net/netfilter/ipset/ip_set_bitmap_port.c index 165f09b1a9cb..6b38eb8f6ed8 100644 --- a/trunk/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/trunk/net/netfilter/ipset/ip_set_bitmap_port.c @@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/trunk/net/netfilter/ipset/ip_set_core.c b/trunk/net/netfilter/ipset/ip_set_core.c index 253326e8d990..9152e69a162d 100644 --- a/trunk/net/netfilter/ipset/ip_set_core.c +++ b/trunk/net/netfilter/ipset/ip_set_core.c @@ -26,6 +26,7 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ +static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ static struct ip_set **ip_set_list; /* all individual sets */ static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ @@ -301,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); static inline void __ip_set_get(ip_set_id_t index) { - atomic_inc(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + ip_set_list[index]->ref++; + write_unlock_bh(&ip_set_ref_lock); } static inline void __ip_set_put(ip_set_id_t index) { - atomic_dec(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + BUG_ON(ip_set_list[index]->ref == 0); + ip_set_list[index]->ref--; + write_unlock_bh(&ip_set_ref_lock); } /* @@ -324,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -356,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -378,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -397,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del); * Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * - * The nfnl mutex must already be activated. */ ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set) @@ -423,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * - * The nfnl mutex must already be activated. */ void ip_set_put_byindex(ip_set_id_t index) { - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + if (ip_set_list[index] != NULL) __ip_set_put(index); - } } EXPORT_SYMBOL_GPL(ip_set_put_byindex); @@ -441,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * can't be destroyed. The set cannot be renamed due to * the referencing either. * - * The nfnl mutex must already be activated. */ const char * ip_set_name_byindex(ip_set_id_t index) @@ -449,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index) const struct ip_set *set = ip_set_list[index]; BUG_ON(set == NULL); - BUG_ON(atomic_read(&set->ref) == 0); + BUG_ON(set->ref == 0); /* Referenced, so it's safe */ return set->name; @@ -515,10 +516,7 @@ void ip_set_nfnl_put(ip_set_id_t index) { nfnl_lock(); - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); - __ip_set_put(index); - } + ip_set_put_byindex(index); nfnl_unlock(); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -526,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); /* * Communication protocol with userspace over netlink. * - * We already locked by nfnl_lock. + * The commands are serialized by the nfnl mutex. */ static inline bool @@ -657,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, return -ENOMEM; rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); - atomic_set(&set->ref, 0); set->family = family; /* @@ -690,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* * Here, we have a valid, constructed set and we are protected - * by nfnl_lock. Find the first free index in ip_set_list and - * check clashing. + * by the nfnl mutex. Find the first free index in ip_set_list + * and check clashing. */ if ((ret = find_free_id(set->name, &index, &clash)) != 0) { /* If this is the same set and requested, ignore error */ @@ -751,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[]) { ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* References are protected by the nfnl mutex */ + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call + * ip_set_put|get_nfnl_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference + * counter, so if it's already zero, we can proceed + * without holding the lock. + */ + read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && - (atomic_read(&ip_set_list[i]->ref))) - return -IPSET_ERR_BUSY; + if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { + ret = IPSET_ERR_BUSY; + goto out; + } } + read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL) ip_set_destroy_set(i); } } else { i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) - return -ENOENT; - else if (atomic_read(&ip_set_list[i]->ref)) - return -IPSET_ERR_BUSY; + if (i == IPSET_INVALID_ID) { + ret = -ENOENT; + goto out; + } else if (ip_set_list[i]->ref) { + ret = -IPSET_ERR_BUSY; + goto out; + } + read_unlock_bh(&ip_set_ref_lock); ip_set_destroy_set(i); } return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Flush sets */ @@ -834,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; const char *name2; ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -843,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; - if (atomic_read(&set->ref) != 0) - return -IPSET_ERR_REFERENCED; + + read_lock_bh(&ip_set_ref_lock); + if (set->ref != 0) { + ret = -IPSET_ERR_REFERENCED; + goto out; + } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && - STREQ(ip_set_list[i]->name, name2)) - return -IPSET_ERR_EXIST_SETNAME2; + STREQ(ip_set_list[i]->name, name2)) { + ret = -IPSET_ERR_EXIST_SETNAME2; + goto out; + } } strncpy(set->name, name2, IPSET_MAXNAMELEN); - return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Swap two sets so that name/index points to the other. * References and set names are also swapped. * - * We are protected by the nfnl mutex and references are - * manipulated only by holding the mutex. The kernel interfaces + * The commands are serialized by the nfnl mutex and references are + * protected by the ip_set_ref_lock. The kernel interfaces * do not hold the mutex but the pointer settings are atomic * so the ip_set_list always contains valid pointers to the sets. */ @@ -874,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - u32 from_ref; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -899,17 +924,15 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, from->type->family == to->type->family)) return -IPSET_ERR_TYPE_MISMATCH; - /* No magic here: ref munging protected by the nfnl_lock */ strncpy(from_name, from->name, IPSET_MAXNAMELEN); - from_ref = atomic_read(&from->ref); - strncpy(from->name, to->name, IPSET_MAXNAMELEN); - atomic_set(&from->ref, atomic_read(&to->ref)); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - atomic_set(&to->ref, from_ref); + write_lock_bh(&ip_set_ref_lock); + swap(from->ref, to->ref); ip_set_list[from_id] = to; ip_set_list[to_id] = from; + write_unlock_bh(&ip_set_ref_lock); return 0; } @@ -926,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[2]) { pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); - __ip_set_put((ip_set_id_t) cb->args[1]); + ip_set_put_byindex((ip_set_id_t) cb->args[1]); } return 0; } @@ -1068,7 +1091,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { pr_debug("release set %s\n", ip_set_list[index]->name); - __ip_set_put(index); + ip_set_put_byindex(index); } /* If we dump all sets, continue with dumping last ones */ diff --git a/trunk/net/netfilter/ipset/ip_set_list_set.c b/trunk/net/netfilter/ipset/ip_set_list_set.c index a47c32982f06..e9159e99fc4b 100644 --- a/trunk/net/netfilter/ipset/ip_set_list_set.c +++ b/trunk/net/netfilter/ipset/ip_set_list_set.c @@ -43,14 +43,19 @@ struct list_set { static inline struct set_elem * list_set_elem(const struct list_set *map, u32 id) { - return (struct set_elem *)((char *)map->members + id * map->dsize); + return (struct set_elem *)((void *)map->members + id * map->dsize); +} + +static inline struct set_telem * +list_set_telem(const struct list_set *map, u32 id) +{ + return (struct set_telem *)((void *)map->members + id * map->dsize); } static inline bool list_set_timeout(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_test(elem->timeout); } @@ -58,19 +63,11 @@ list_set_timeout(const struct list_set *map, u32 id) static inline bool list_set_expired(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_expired(elem->timeout); } -static inline int -list_set_exist(const struct set_telem *elem) -{ - return elem->id != IPSET_INVALID_ID && - !ip_set_timeout_expired(elem->timeout); -} - /* Set list without and with timeout */ static int @@ -146,11 +143,11 @@ list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, struct set_telem *e; for (; i < map->size; i++) { - e = (struct set_telem *)list_set_elem(map, i); + e = list_set_telem(map, i); swap(e->id, id); + swap(e->timeout, timeout); if (e->id == IPSET_INVALID_ID) break; - swap(e->timeout, timeout); } } @@ -164,7 +161,7 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); if (with_timeout(map->timeout)) - list_elem_tadd(map, i, id, timeout); + list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else list_elem_add(map, i, id); @@ -172,11 +169,11 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, } static int -list_set_del(struct list_set *map, ip_set_id_t id, u32 i) +list_set_del(struct list_set *map, u32 i) { struct set_elem *a = list_set_elem(map, i), *b; - ip_set_put_byindex(id); + ip_set_put_byindex(a->id); for (; i < map->size - 1; i++) { b = list_set_elem(map, i + 1); @@ -308,11 +305,11 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], (before == 0 || (before > 0 && next_id_eq(map, i, refid)))) - ret = list_set_del(map, id, i); + ret = list_set_del(map, i); else if (before < 0 && elem->id == refid && next_id_eq(map, i, id)) - ret = list_set_del(map, id, i + 1); + ret = list_set_del(map, i + 1); } break; default: @@ -369,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); if (with_timeout(map->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * map->dsize)); ipset_nest_end(skb, nested); @@ -461,16 +457,13 @@ list_set_gc(unsigned long ul_set) struct set_telem *e; u32 i; - /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ - read_lock_bh(&set->lock); - for (i = map->size - 1; i >= 0; i--) { - e = (struct set_telem *) list_set_elem(map, i); - if (e->id != IPSET_INVALID_ID && - list_set_expired(map, i)) - list_set_del(map, e->id, i); + write_lock_bh(&set->lock); + for (i = 0; i < map->size; i++) { + e = list_set_telem(map, i); + if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) + list_set_del(map, i); } - read_unlock_bh(&set->lock); + write_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); diff --git a/trunk/net/netfilter/ipvs/ip_vs_ctl.c b/trunk/net/netfilter/ipvs/ip_vs_ctl.c index 33733c8872e7..ae47090bf45f 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_ctl.c +++ b/trunk/net/netfilter/ipvs/ip_vs_ctl.c @@ -3120,7 +3120,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_net(skb); + struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); diff --git a/trunk/net/netfilter/nf_conntrack_h323_asn1.c b/trunk/net/netfilter/nf_conntrack_h323_asn1.c index 867882313e49..bcd5ed6b7130 100644 --- a/trunk/net/netfilter/nf_conntrack_h323_asn1.c +++ b/trunk/net/netfilter/nf_conntrack_h323_asn1.c @@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, CHECK_BOUND(bs, 2); count = *bs->cur++; count <<= 8; - count = *bs->cur++; + count += *bs->cur++; break; case SEMI: BYTE_ALIGN(bs); diff --git a/trunk/net/netfilter/nf_conntrack_h323_main.c b/trunk/net/netfilter/nf_conntrack_h323_main.c index 533a183e6661..18b2ce5c8ced 100644 --- a/trunk/net/netfilter/nf_conntrack_h323_main.c +++ b/trunk/net/netfilter/nf_conntrack_h323_main.c @@ -731,10 +731,10 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, - flowi4_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, - flowi4_to_flowi(&fl2))) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2), false)) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -755,10 +755,10 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); - if (!afinfo->route((struct dst_entry **)&rt1, - flowi6_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2))) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2), false)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/trunk/net/netfilter/xt_TCPMSS.c b/trunk/net/netfilter/xt_TCPMSS.c index 6e6b46cb1db9..9e63b43faeed 100644 --- a/trunk/net/netfilter/xt_TCPMSS.c +++ b/trunk/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { diff --git a/trunk/net/netfilter/xt_addrtype.c b/trunk/net/netfilter/xt_addrtype.c index 2220b85e9519..b77d383cec78 100644 --- a/trunk/net/netfilter/xt_addrtype.c +++ b/trunk/net/netfilter/xt_addrtype.c @@ -32,11 +32,32 @@ MODULE_ALIAS("ipt_addrtype"); MODULE_ALIAS("ip6t_addrtype"); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr) { + const struct nf_afinfo *afinfo; + struct flowi6 flow; + struct rt6_info *rt; u32 ret; + int route_err; - if (!rt) + memset(&flow, 0, sizeof(flow)); + ipv6_addr_copy(&flow.daddr, addr); + if (dev) + flow.flowi6_oif = dev->ifindex; + + rcu_read_lock(); + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (afinfo != NULL) + route_err = afinfo->route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), !!dev); + else + route_err = 1; + + rcu_read_unlock(); + + if (route_err) return XT_ADDRTYPE_UNREACHABLE; if (rt->rt6i_flags & RTF_REJECT) @@ -48,6 +69,9 @@ static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; + + + dst_release(&rt->dst); return ret; } @@ -65,18 +89,8 @@ static bool match_type6(struct net *net, const struct net_device *dev, return false; if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | - XT_ADDRTYPE_UNREACHABLE) & mask) { - struct rt6_info *rt; - u32 type; - int ifindex = dev ? dev->ifindex : 0; - - rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); - - type = xt_addrtype_rt6_to_type(rt); - - dst_release(&rt->dst); - return !!(mask & type); - } + XT_ADDRTYPE_UNREACHABLE) & mask) + return !!(mask & match_lookup_rt6(net, dev, addr)); return true; } diff --git a/trunk/net/netfilter/xt_conntrack.c b/trunk/net/netfilter/xt_conntrack.c index 2c0086a4751e..481a86fdc409 100644 --- a/trunk/net/netfilter/xt_conntrack.c +++ b/trunk/net/netfilter/xt_conntrack.c @@ -195,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return info->match_flags & XT_CONNTRACK_STATE; if ((info->match_flags & XT_CONNTRACK_DIRECTION) && (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ - !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) + !(info->invert_flags & XT_CONNTRACK_DIRECTION)) return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC)