diff --git a/[refs] b/[refs] index 54c45bfdab16..d3066ae57804 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 0b0c9d3a5872e8a02a071c6f0775ee6bf00a1206 +refs/heads/master: fc0900cbda9243957d812cd6b4cc87965f9fe75f diff --git a/trunk/Documentation/DocBook/kgdb.tmpl b/trunk/Documentation/DocBook/kgdb.tmpl index 4ee4ba3509fc..d71b57fcf116 100644 --- a/trunk/Documentation/DocBook/kgdb.tmpl +++ b/trunk/Documentation/DocBook/kgdb.tmpl @@ -361,23 +361,6 @@ It is possible to use this option with kgdboc on a tty that is not a system console. - - - Run time parameter: kgdbreboot - The kgdbreboot feature allows you to change how the debugger - deals with the reboot notification. You have 3 choices for the - behavior. The default behavior is always set to 0. - - echo -1 > /sys/module/debug_core/parameters/kgdbreboot - Ignore the reboot notification entirely. - - echo 0 > /sys/module/debug_core/parameters/kgdbreboot - Send the detach message to any attached debugger client. - - echo 1 > /sys/module/debug_core/parameters/kgdbreboot - Enter the debugger on reboot notify. - - diff --git a/trunk/Documentation/devicetree/bindings/i2c/sirf-i2c.txt b/trunk/Documentation/devicetree/bindings/i2c/sirf-i2c.txt deleted file mode 100644 index 7baf9e133fa8..000000000000 --- a/trunk/Documentation/devicetree/bindings/i2c/sirf-i2c.txt +++ /dev/null @@ -1,19 +0,0 @@ -I2C for SiRFprimaII platforms - -Required properties : -- compatible : Must be "sirf,prima2-i2c" -- reg: physical base address of the controller and length of memory mapped - region. -- interrupts: interrupt number to the cpu. - -Optional properties: -- clock-frequency : Constains desired I2C/HS-I2C bus clock frequency in Hz. - The absence of the propoerty indicates the default frequency 100 kHz. - -Examples : - -i2c0: i2c@b00e0000 { - compatible = "sirf,prima2-i2c"; - reg = <0xb00e0000 0x10000>; - interrupts = <24>; -}; diff --git a/trunk/Documentation/filesystems/nfs/idmapper.txt b/trunk/Documentation/filesystems/nfs/idmapper.txt index fe03d10bb79a..120fd3cf7fd9 100644 --- a/trunk/Documentation/filesystems/nfs/idmapper.txt +++ b/trunk/Documentation/filesystems/nfs/idmapper.txt @@ -4,21 +4,13 @@ ID Mapper ========= Id mapper is used by NFS to translate user and group ids into names, and to translate user and group names into ids. Part of this translation involves -performing an upcall to userspace to request the information. There are two -ways NFS could obtain this information: placing a call to /sbin/request-key -or by placing a call to the rpc.idmap daemon. - -NFS will attempt to call /sbin/request-key first. If this succeeds, the -result will be cached using the generic request-key cache. This call should -only fail if /etc/request-key.conf is not configured for the id_resolver key -type, see the "Configuring" section below if you wish to use the request-key -method. - -If the call to /sbin/request-key fails (if /etc/request-key.conf is not -configured with the id_resolver key type), then the idmapper will ask the -legacy rpc.idmap daemon for the id mapping. This result will be stored -in a custom NFS idmap cache. +performing an upcall to userspace to request the information. Id mapper will +user request-key to perform this upcall and cache the result. The program +/usr/sbin/nfs.idmap should be called by request-key, and will perform the +translation and initialize a key with the resulting information. + NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this + feature. =========== Configuring diff --git a/trunk/Documentation/filesystems/nfs/pnfs.txt b/trunk/Documentation/filesystems/nfs/pnfs.txt index c7919c6e3bea..983e14abe7e9 100644 --- a/trunk/Documentation/filesystems/nfs/pnfs.txt +++ b/trunk/Documentation/filesystems/nfs/pnfs.txt @@ -53,57 +53,3 @@ lseg maintains an extra reference corresponding to the NFS_LSEG_VALID bit which holds it in the pnfs_layout_hdr's list. When the final lseg is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED bit is set, preventing any new lsegs from being added. - -layout drivers --------------- - -PNFS utilizes what is called layout drivers. The STD defines 3 basic -layout types: "files" "objects" and "blocks". For each of these types -there is a layout-driver with a common function-vectors table which -are called by the nfs-client pnfs-core to implement the different layout -types. - -Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c -Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory -Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory - -objects-layout setup --------------------- - -As part of the full STD implementation the objlayoutdriver.ko needs, at times, -to automatically login to yet undiscovered iscsi/osd devices. For this the -driver makes up-calles to a user-mode script called *osd_login* - -The path_name of the script to use is by default: - /sbin/osd_login. -This name can be overridden by the Kernel module parameter: - objlayoutdriver.osd_login_prog - -If Kernel does not find the osd_login_prog path it will zero it out -and will not attempt farther logins. An admin can then write new value -to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it. - -The /sbin/osd_login is part of the nfs-utils package, and should usually -be installed on distributions that support this Kernel version. - -The API to the login script is as follows: - Usage: $0 -u -o -s - Options: - -u target uri e.g. iscsi://: - (allways exists) - (More protocols can be defined in the future. - The client does not interpret this string it is - passed unchanged as recieved from the Server) - -o osdname of the requested target OSD - (Might be empty) - (A string which denotes the OSD name, there is a - limit of 64 chars on this string) - -s systemid of the requested target OSD - (Might be empty) - (This string, if not empty is always an hex - representation of the 20 bytes osd_system_id) - -blocks-layout setup -------------------- - -TODO: Document the setup needs of the blocks layout driver diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 1c9a348548dc..247dcfd62034 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1672,14 +1672,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. of returning the full 64-bit number. The default is to return 64-bit inode numbers. - nfs.max_session_slots= - [NFSv4.1] Sets the maximum number of session slots - the client will attempt to negotiate with the server. - This limits the number of simultaneous RPC requests - that the client can send to the NFSv4.1 server. - Note that there is little point in setting this - value higher than the max_tcp_slot_table_limit. - nfs.nfs4_disable_idmapping= [NFSv4] When set to the default of '1', this option ensures that both the RPC level authentication @@ -1693,21 +1685,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. back to using the idmapper. To turn off this behaviour, set the value to '0'. - nfs.send_implementation_id = - [NFSv4.1] Send client implementation identification - information in exchange_id requests. - If zero, no implementation identification information - will be sent. - The default is to send the implementation identification - information. - - - objlayoutdriver.osd_login_prog= - [NFS] [OBJLAYOUT] sets the pathname to the program which - is used to automatically discover and login into new - osd-targets. Please see: - Documentation/filesystems/pnfs.txt for more explanations - nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take when a NMI is triggered. Format: [state][,regs][,debounce][,die] @@ -2147,14 +2124,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the default. off: Turn ECRC off on: Turn ECRC on. - realloc= Enable/disable reallocating PCI bridge resources - if allocations done by BIOS are too small to - accommodate resources required by all child - devices. - off: Turn realloc off - on: Turn realloc on - realloc same as realloc=on - noari do not use PCIe ARI. + realloc reallocate PCI resources if allocations done by BIOS + are erroneous. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. @@ -2162,10 +2133,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. force Enable ASPM even on devices that claim not to support it. WARNING: Forcing ASPM on may cause system lockups. - pcie_hp= [PCIE] PCI Express Hotplug driver options: - nomsi Do not use MSI for PCI Express Native Hotplug (this - makes all PCIe ports use INTx for hotplug services). - pcie_ports= [PCIE] PCIe ports handling: auto Ask the BIOS whether or not to use native PCIe services associated with PCIe ports (PME, hot-plug, AER). Use diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index ec9bcb17c572..95eba3135018 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -503,7 +503,7 @@ F: arch/x86/include/asm/geode.h AMD IOMMU (AMD-VI) M: Joerg Roedel L: iommu@lists.linux-foundation.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git S: Supported F: drivers/iommu/amd_iommu*.[ch] F: include/linux/amd-iommu.h @@ -5120,7 +5120,7 @@ F: Documentation/PCI/pci-error-recovery.txt F: Documentation/powerpc/eeh-pci-error-recovery.txt PCI SUBSYSTEM -M: Bjorn Helgaas +M: Jesse Barnes L: linux-pci@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-pci/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6.git @@ -5130,7 +5130,7 @@ F: drivers/pci/ F: include/linux/pci* PCI HOTPLUG -M: Bjorn Helgaas +M: Jesse Barnes L: linux-pci@vger.kernel.org S: Supported F: drivers/pci/hotplug diff --git a/trunk/arch/alpha/include/asm/pci.h b/trunk/arch/alpha/include/asm/pci.h index d01afb78919c..28d0497fd3c7 100644 --- a/trunk/arch/alpha/include/asm/pci.h +++ b/trunk/arch/alpha/include/asm/pci.h @@ -7,7 +7,6 @@ #include #include #include -#include /* * The following structure is used to manage multiple PCI busses. @@ -100,6 +99,12 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? 15 : 14; } +extern void pcibios_resource_to_bus(struct pci_dev *, struct pci_bus_region *, + struct resource *); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index static inline int pci_proc_domain(struct pci_bus *bus) diff --git a/trunk/arch/alpha/kernel/pci.c b/trunk/arch/alpha/kernel/pci.c index 1a629636cc16..8c723c1b086a 100644 --- a/trunk/arch/alpha/kernel/pci.c +++ b/trunk/arch/alpha/kernel/pci.c @@ -43,10 +43,12 @@ const char *const pci_mem_names[] = { const char pci_hae0_name[] = "HAE0"; +/* Indicate whether we respect the PCI setup left by console. */ /* - * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource - * assignments. + * Make this long-lived so that we know when shutting down + * whether we probed only or not. */ +int pci_probe_only; /* * The PCI controller list. @@ -213,7 +215,7 @@ pdev_save_srm_config(struct pci_dev *dev) struct pdev_srm_saved_conf *tmp; static int printed = 0; - if (!alpha_using_srm || pci_has_flag(PCI_PROBE_ONLY)) + if (!alpha_using_srm || pci_probe_only) return; if (!printed) { @@ -240,7 +242,7 @@ pci_restore_srm_config(void) struct pdev_srm_saved_conf *tmp; /* No need to restore if probed only. */ - if (pci_has_flag(PCI_PROBE_ONLY)) + if (pci_probe_only) return; /* Restore SRM config. */ @@ -250,18 +252,47 @@ pci_restore_srm_config(void) } #endif +void __devinit +pcibios_fixup_resource(struct resource *res, struct resource *root) +{ + res->start += root->start; + res->end += root->start; +} + +void __devinit +pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_controller *hose = (struct pci_controller *)bus->sysdata; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!dev->resource[i].start) + continue; + if (dev->resource[i].flags & IORESOURCE_IO) + pcibios_fixup_resource(&dev->resource[i], + hose->io_space); + else if (dev->resource[i].flags & IORESOURCE_MEM) + pcibios_fixup_resource(&dev->resource[i], + hose->mem_space); + } +} + void __devinit pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev = bus->self; - if (pci_has_flag(PCI_PROBE_ONLY) && dev && + if (pci_probe_only && dev && (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { pci_read_bridge_bases(bus); + pcibios_fixup_device_resources(dev, bus); } list_for_each_entry(dev, &bus->devices, bus_list) { pdev_save_srm_config(dev); + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); } } @@ -271,6 +302,42 @@ pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } +void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_controller *hose = (struct pci_controller *)dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_space->start; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_space->start; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_controller *hose = (struct pci_controller *)dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_space->start; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_space->start; + + res->start = region->start + offset; + res->end = region->end + offset; +} + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); +#endif + int pcibios_enable_device(struct pci_dev *dev, int mask) { @@ -307,8 +374,7 @@ pcibios_claim_one_bus(struct pci_bus *b) if (r->parent || !r->start || !r->flags) continue; - if (pci_has_flag(PCI_PROBE_ONLY) || - (r->flags & IORESOURCE_PCI_FIXED)) + if (pci_probe_only || (r->flags & IORESOURCE_PCI_FIXED)) pci_claim_resource(dev, i); } } @@ -350,10 +416,8 @@ common_init_pci(void) hose->mem_space->end = end; INIT_LIST_HEAD(&resources); - pci_add_resource_offset(&resources, hose->io_space, - hose->io_space->start); - pci_add_resource_offset(&resources, hose->mem_space, - hose->mem_space->start); + pci_add_resource(&resources, hose->io_space); + pci_add_resource(&resources, hose->mem_space); bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops, hose, &resources); diff --git a/trunk/arch/alpha/kernel/pci_impl.h b/trunk/arch/alpha/kernel/pci_impl.h index 2b0ac429f5eb..85457b2d4516 100644 --- a/trunk/arch/alpha/kernel/pci_impl.h +++ b/trunk/arch/alpha/kernel/pci_impl.h @@ -173,6 +173,9 @@ extern void pci_restore_srm_config(void); extern struct pci_controller *hose_head, **hose_tail; extern struct pci_controller *pci_isa_hose; +/* Indicate that we trust the console to configure things properly. */ +extern int pci_probe_only; + extern unsigned long alpha_agpgart_size; extern void common_init_pci(void); diff --git a/trunk/arch/alpha/kernel/sys_marvel.c b/trunk/arch/alpha/kernel/sys_marvel.c index fc8b12508611..95cfc83ece8f 100644 --- a/trunk/arch/alpha/kernel/sys_marvel.c +++ b/trunk/arch/alpha/kernel/sys_marvel.c @@ -384,8 +384,7 @@ marvel_init_pci(void) marvel_register_error_handlers(); - /* Indicate that we trust the console to configure things properly */ - pci_set_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; common_init_pci(); locate_and_init_vga(NULL); diff --git a/trunk/arch/alpha/kernel/sys_titan.c b/trunk/arch/alpha/kernel/sys_titan.c index b8eafa053539..f47b30a2a117 100644 --- a/trunk/arch/alpha/kernel/sys_titan.c +++ b/trunk/arch/alpha/kernel/sys_titan.c @@ -331,8 +331,7 @@ titan_init_pci(void) */ titan_late_init(); - /* Indicate that we trust the console to configure things properly */ - pci_set_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; common_init_pci(); SMC669_Init(0); locate_and_init_vga(NULL); diff --git a/trunk/arch/arm/common/it8152.c b/trunk/arch/arm/common/it8152.c index dcb13494ca0d..fb1f1cfce60c 100644 --- a/trunk/arch/arm/common/it8152.c +++ b/trunk/arch/arm/common/it8152.c @@ -299,8 +299,8 @@ int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) goto err1; } - pci_add_resource_offset(&sys->resources, &it8152_io, sys->io_offset); - pci_add_resource_offset(&sys->resources, &it8152_mem, sys->mem_offset); + pci_add_resource(&sys->resources, &it8152_io); + pci_add_resource(&sys->resources, &it8152_mem); if (platform_notify || platform_notify_remove) { printk(KERN_ERR "PCI: Can't use platform_notify\n"); diff --git a/trunk/arch/arm/include/asm/pci.h b/trunk/arch/arm/include/asm/pci.h index a98a2e112fae..da337ba57ffd 100644 --- a/trunk/arch/arm/include/asm/pci.h +++ b/trunk/arch/arm/include/asm/pci.h @@ -57,6 +57,14 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); + +extern void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + /* * Dummy implementation; always return 0. */ diff --git a/trunk/arch/arm/kernel/bios32.c b/trunk/arch/arm/kernel/bios32.c index 632df9a66f8c..f58ba3589908 100644 --- a/trunk/arch/arm/kernel/bios32.c +++ b/trunk/arch/arm/kernel/bios32.c @@ -16,6 +16,7 @@ #include static int debug_pci; +static int use_firmware; /* * We can't use pci_find_device() here since we are @@ -293,6 +294,28 @@ static inline int pdev_bad_for_parity(struct pci_dev *dev) } +/* + * Adjust the device resources from bus-centric to Linux-centric. + */ +static void __devinit +pdev_fixup_device_resources(struct pci_sys_data *root, struct pci_dev *dev) +{ + resource_size_t offset; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (dev->resource[i].start == 0) + continue; + if (dev->resource[i].flags & IORESOURCE_MEM) + offset = root->mem_offset; + else + offset = root->io_offset; + + dev->resource[i].start += offset; + dev->resource[i].end += offset; + } +} + /* * pcibios_fixup_bus - Called after each bus is probed, * but before its children are examined. @@ -310,6 +333,8 @@ void pcibios_fixup_bus(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { u16 status; + pdev_fixup_device_resources(root, dev); + pci_read_config_word(dev, PCI_STATUS, &status); /* @@ -374,6 +399,43 @@ void pcibios_fixup_bus(struct pci_bus *bus) EXPORT_SYMBOL(pcibios_fixup_bus); #endif +/* + * Convert from Linux-centric to bus-centric addresses for bridge devices. + */ +void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_sys_data *root = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = root->io_offset; + if (res->flags & IORESOURCE_MEM) + offset = root->mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void __devinit +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_sys_data *root = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = root->io_offset; + if (res->flags & IORESOURCE_MEM) + offset = root->mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + /* * Swizzle the device pin each time we cross a bridge. * This might update pin and returns the slot number. @@ -435,10 +497,10 @@ static void __init pcibios_init_hw(struct hw_pci *hw) if (ret > 0) { if (list_empty(&sys->resources)) { - pci_add_resource_offset(&sys->resources, - &ioport_resource, sys->io_offset); - pci_add_resource_offset(&sys->resources, - &iomem_resource, sys->mem_offset); + pci_add_resource(&sys->resources, + &ioport_resource); + pci_add_resource(&sys->resources, + &iomem_resource); } sys->bus = hw->scan(nr, sys); @@ -463,7 +525,6 @@ void __init pci_common_init(struct hw_pci *hw) INIT_LIST_HEAD(&hw->buses); - pci_add_flags(PCI_REASSIGN_ALL_RSRC); if (hw->preinit) hw->preinit(); pcibios_init_hw(hw); @@ -475,7 +536,7 @@ void __init pci_common_init(struct hw_pci *hw) list_for_each_entry(sys, &hw->buses, node) { struct pci_bus *bus = sys->bus; - if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (!use_firmware) { /* * Size the bridge windows. */ @@ -512,7 +573,7 @@ char * __init pcibios_setup(char *str) debug_pci = 1; return NULL; } else if (!strcmp(str, "firmware")) { - pci_add_flags(PCI_PROBE_ONLY); + use_firmware = 1; return NULL; } return str; diff --git a/trunk/arch/arm/mach-cns3xxx/pcie.c b/trunk/arch/arm/mach-cns3xxx/pcie.c index 79d001f831e0..e159d69967c9 100644 --- a/trunk/arch/arm/mach-cns3xxx/pcie.c +++ b/trunk/arch/arm/mach-cns3xxx/pcie.c @@ -155,8 +155,8 @@ static int cns3xxx_pci_setup(int nr, struct pci_sys_data *sys) BUG_ON(request_resource(&iomem_resource, res_io) || request_resource(&iomem_resource, res_mem)); - pci_add_resource_offset(&sys->resources, res_io, sys->io_offset); - pci_add_resource_offset(&sys->resources, res_mem, sys->mem_offset); + pci_add_resource(&sys->resources, res_io); + pci_add_resource(&sys->resources, res_mem); return 1; } diff --git a/trunk/arch/arm/mach-dove/pcie.c b/trunk/arch/arm/mach-dove/pcie.c index 48a032005ea3..52e96d397ba8 100644 --- a/trunk/arch/arm/mach-dove/pcie.c +++ b/trunk/arch/arm/mach-dove/pcie.c @@ -69,7 +69,7 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys) pp->res[0].flags = IORESOURCE_IO; if (request_resource(&ioport_resource, &pp->res[0])) panic("Request PCIe IO resource failed\n"); - pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset); + pci_add_resource(&sys->resources, &pp->res[0]); /* * IORESOURCE_MEM @@ -88,7 +88,7 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys) pp->res[1].flags = IORESOURCE_MEM; if (request_resource(&iomem_resource, &pp->res[1])) panic("Request PCIe Memory resource failed\n"); - pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &pp->res[1]); return 1; } diff --git a/trunk/arch/arm/mach-footbridge/dc21285.c b/trunk/arch/arm/mach-footbridge/dc21285.c index 3194d3f73503..f685650c25d7 100644 --- a/trunk/arch/arm/mach-footbridge/dc21285.c +++ b/trunk/arch/arm/mach-footbridge/dc21285.c @@ -275,13 +275,11 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys) allocate_resource(&iomem_resource, &res[0], 0x40000000, 0x80000000, 0xffffffff, 0x40000000, NULL, NULL); + pci_add_resource(&sys->resources, &ioport_resource); + pci_add_resource(&sys->resources, &res[0]); + pci_add_resource(&sys->resources, &res[1]); sys->mem_offset = DC21285_PCI_MEM; - pci_add_resource_offset(&sys->resources, - &ioport_resource, sys->io_offset); - pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset); - pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); - return 1; } diff --git a/trunk/arch/arm/mach-integrator/pci_v3.c b/trunk/arch/arm/mach-integrator/pci_v3.c index 015be770c1d8..3c82566acece 100644 --- a/trunk/arch/arm/mach-integrator/pci_v3.c +++ b/trunk/arch/arm/mach-integrator/pci_v3.c @@ -378,10 +378,9 @@ static int __init pci_v3_setup_resources(struct pci_sys_data *sys) * the mem resource for this bus * the prefetch mem resource for this bus */ - pci_add_resource_offset(&sys->resources, - &ioport_resource, sys->io_offset); - pci_add_resource_offset(&sys->resources, &non_mem, sys->mem_offset); - pci_add_resource_offset(&sys->resources, &pre_mem, sys->mem_offset); + pci_add_resource(&sys->resources, &ioport_resource); + pci_add_resource(&sys->resources, &non_mem); + pci_add_resource(&sys->resources, &pre_mem); return 1; } diff --git a/trunk/arch/arm/mach-iop13xx/pci.c b/trunk/arch/arm/mach-iop13xx/pci.c index 861cb12ef436..b8f5a8736511 100644 --- a/trunk/arch/arm/mach-iop13xx/pci.c +++ b/trunk/arch/arm/mach-iop13xx/pci.c @@ -1084,8 +1084,8 @@ int iop13xx_pci_setup(int nr, struct pci_sys_data *sys) request_resource(&ioport_resource, &res[0]); request_resource(&iomem_resource, &res[1]); - pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset); - pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &res[0]); + pci_add_resource(&sys->resources, &res[1]); return 1; } diff --git a/trunk/arch/arm/mach-ixp2000/ixdp2400.c b/trunk/arch/arm/mach-ixp2000/ixdp2400.c index d519944653ad..f53e911ec94a 100644 --- a/trunk/arch/arm/mach-ixp2000/ixdp2400.c +++ b/trunk/arch/arm/mach-ixp2000/ixdp2400.c @@ -134,11 +134,11 @@ static void ixdp2400_pci_postinit(void) if (ixdp2x00_master_npu()) { dev = pci_get_bus_and_slot(1, IXDP2400_SLAVE_ENET_DEVFN); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } else { dev = pci_get_bus_and_slot(1, IXDP2400_MASTER_ENET_DEVFN); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); ixdp2x00_slave_pci_postinit(); diff --git a/trunk/arch/arm/mach-ixp2000/ixdp2800.c b/trunk/arch/arm/mach-ixp2000/ixdp2800.c index b415febd2025..a2e7c393e74f 100644 --- a/trunk/arch/arm/mach-ixp2000/ixdp2800.c +++ b/trunk/arch/arm/mach-ixp2000/ixdp2800.c @@ -262,14 +262,14 @@ int __init ixdp2800_pci_init(void) pci_common_init(&ixdp2800_pci); if (ixdp2x00_master_npu()) { dev = pci_get_bus_and_slot(1, IXDP2800_SLAVE_ENET_DEVFN); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); ixdp2800_master_enable_slave(); ixdp2800_master_wait_for_slave_bus_scan(); } else { dev = pci_get_bus_and_slot(1, IXDP2800_MASTER_ENET_DEVFN); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } } diff --git a/trunk/arch/arm/mach-ixp2000/ixdp2x00.c b/trunk/arch/arm/mach-ixp2000/ixdp2x00.c index dd9838299068..634b6c852f68 100644 --- a/trunk/arch/arm/mach-ixp2000/ixdp2x00.c +++ b/trunk/arch/arm/mach-ixp2000/ixdp2x00.c @@ -239,12 +239,12 @@ void ixdp2x00_slave_pci_postinit(void) * Remove PMC device is there is one */ if((dev = pci_get_bus_and_slot(1, IXDP2X00_PMC_DEVFN))) { - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } dev = pci_get_bus_and_slot(0, IXDP2X00_21555_DEVFN); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } diff --git a/trunk/arch/arm/mach-ixp2000/pci.c b/trunk/arch/arm/mach-ixp2000/pci.c index 49c36f3cd602..626fda435aa9 100644 --- a/trunk/arch/arm/mach-ixp2000/pci.c +++ b/trunk/arch/arm/mach-ixp2000/pci.c @@ -243,10 +243,8 @@ int ixp2000_pci_setup(int nr, struct pci_sys_data *sys) if (nr >= 1) return 0; - pci_add_resource_offset(&sys->resources, - &ixp2000_pci_io_space, sys->io_offset); - pci_add_resource_offset(&sys->resources, - &ixp2000_pci_mem_space, sys->mem_offset); + pci_add_resource(&sys->resources, &ixp2000_pci_io_space); + pci_add_resource(&sys->resources, &ixp2000_pci_mem_space); return 1; } diff --git a/trunk/arch/arm/mach-ixp23xx/pci.c b/trunk/arch/arm/mach-ixp23xx/pci.c index 3cbbd3208fa8..25b5c462cea2 100644 --- a/trunk/arch/arm/mach-ixp23xx/pci.c +++ b/trunk/arch/arm/mach-ixp23xx/pci.c @@ -281,10 +281,8 @@ int ixp23xx_pci_setup(int nr, struct pci_sys_data *sys) if (nr >= 1) return 0; - pci_add_resource_offset(&sys->resources, - &ixp23xx_pci_io_space, sys->io_offset); - pci_add_resource_offset(&sys->resources, - &ixp23xx_pci_mem_space, sys->mem_offset); + pci_add_resource(&sys->resources, &ixp23xx_pci_io_space); + pci_add_resource(&sys->resources, &ixp23xx_pci_mem_space); return 1; } diff --git a/trunk/arch/arm/mach-ixp4xx/common-pci.c b/trunk/arch/arm/mach-ixp4xx/common-pci.c index 8508882b13f0..5eff15f24bc2 100644 --- a/trunk/arch/arm/mach-ixp4xx/common-pci.c +++ b/trunk/arch/arm/mach-ixp4xx/common-pci.c @@ -472,8 +472,8 @@ int ixp4xx_setup(int nr, struct pci_sys_data *sys) request_resource(&ioport_resource, &res[0]); request_resource(&iomem_resource, &res[1]); - pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset); - pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &res[0]); + pci_add_resource(&sys->resources, &res[1]); platform_notify = ixp4xx_pci_platform_notify; platform_notify_remove = ixp4xx_pci_platform_notify_remove; diff --git a/trunk/arch/arm/mach-kirkwood/pcie.c b/trunk/arch/arm/mach-kirkwood/pcie.c index f56a0118c1bb..a066a6d8d9d2 100644 --- a/trunk/arch/arm/mach-kirkwood/pcie.c +++ b/trunk/arch/arm/mach-kirkwood/pcie.c @@ -198,9 +198,9 @@ static int __init kirkwood_pcie_setup(int nr, struct pci_sys_data *sys) if (request_resource(&iomem_resource, &pp->res[1])) panic("Request PCIe%d Memory resource failed\n", index); + pci_add_resource(&sys->resources, &pp->res[0]); + pci_add_resource(&sys->resources, &pp->res[1]); sys->io_offset = 0; - pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset); - pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset); /* * Generic PCIe unit setup. diff --git a/trunk/arch/arm/mach-ks8695/pci.c b/trunk/arch/arm/mach-ks8695/pci.c index acc701435817..b26f992071df 100644 --- a/trunk/arch/arm/mach-ks8695/pci.c +++ b/trunk/arch/arm/mach-ks8695/pci.c @@ -169,8 +169,8 @@ static int __init ks8695_pci_setup(int nr, struct pci_sys_data *sys) request_resource(&iomem_resource, &pci_mem); request_resource(&ioport_resource, &pci_io); - pci_add_resource_offset(&sys->resources, &pci_io, sys->io_offset); - pci_add_resource_offset(&sys->resources, &pci_mem, sys->mem_offset); + pci_add_resource(&sys->resources, &pci_io); + pci_add_resource(&sys->resources, &pci_mem); /* Assign and enable processor bridge */ ks8695_local_writeconfig(PCI_BASE_ADDRESS_0, KS8695_PCIMEM_PA); diff --git a/trunk/arch/arm/mach-mv78xx0/pcie.c b/trunk/arch/arm/mach-mv78xx0/pcie.c index df3e38055a24..8459f6d7d8ca 100644 --- a/trunk/arch/arm/mach-mv78xx0/pcie.c +++ b/trunk/arch/arm/mach-mv78xx0/pcie.c @@ -155,8 +155,8 @@ static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys) orion_pcie_set_local_bus_nr(pp->base, sys->busnr); orion_pcie_setup(pp->base); - pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset); - pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &pp->res[0]); + pci_add_resource(&sys->resources, &pp->res[1]); return 1; } diff --git a/trunk/arch/arm/mach-orion5x/pci.c b/trunk/arch/arm/mach-orion5x/pci.c index d6a91948e4dc..09a045f0c406 100644 --- a/trunk/arch/arm/mach-orion5x/pci.c +++ b/trunk/arch/arm/mach-orion5x/pci.c @@ -171,14 +171,13 @@ static int __init pcie_setup(struct pci_sys_data *sys) /* * IORESOURCE_IO */ - sys->io_offset = 0; res[0].name = "PCIe I/O Space"; res[0].flags = IORESOURCE_IO; res[0].start = ORION5X_PCIE_IO_BUS_BASE; res[0].end = res[0].start + ORION5X_PCIE_IO_SIZE - 1; if (request_resource(&ioport_resource, &res[0])) panic("Request PCIe IO resource failed\n"); - pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset); + pci_add_resource(&sys->resources, &res[0]); /* * IORESOURCE_MEM @@ -189,7 +188,9 @@ static int __init pcie_setup(struct pci_sys_data *sys) res[1].end = res[1].start + ORION5X_PCIE_MEM_SIZE - 1; if (request_resource(&iomem_resource, &res[1])) panic("Request PCIe Memory resource failed\n"); - pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &res[1]); + + sys->io_offset = 0; return 1; } @@ -498,14 +499,13 @@ static int __init pci_setup(struct pci_sys_data *sys) /* * IORESOURCE_IO */ - sys->io_offset = 0; res[0].name = "PCI I/O Space"; res[0].flags = IORESOURCE_IO; res[0].start = ORION5X_PCI_IO_BUS_BASE; res[0].end = res[0].start + ORION5X_PCI_IO_SIZE - 1; if (request_resource(&ioport_resource, &res[0])) panic("Request PCI IO resource failed\n"); - pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset); + pci_add_resource(&sys->resources, &res[0]); /* * IORESOURCE_MEM @@ -516,7 +516,9 @@ static int __init pci_setup(struct pci_sys_data *sys) res[1].end = res[1].start + ORION5X_PCI_MEM_SIZE - 1; if (request_resource(&iomem_resource, &res[1])) panic("Request PCI Memory resource failed\n"); - pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &res[1]); + + sys->io_offset = 0; return 1; } diff --git a/trunk/arch/arm/mach-sa1100/pci-nanoengine.c b/trunk/arch/arm/mach-sa1100/pci-nanoengine.c index b466bca9c651..0d01ca788922 100644 --- a/trunk/arch/arm/mach-sa1100/pci-nanoengine.c +++ b/trunk/arch/arm/mach-sa1100/pci-nanoengine.c @@ -244,11 +244,9 @@ static int __init pci_nanoengine_setup_resources(struct pci_sys_data *sys) printk(KERN_ERR "PCI: unable to allocate prefetchable\n"); return -EBUSY; } - pci_add_resource_offset(&sys->resources, &pci_io_ports, sys->io_offset); - pci_add_resource_offset(&sys->resources, - &pci_non_prefetchable_memory, sys->mem_offset); - pci_add_resource_offset(&sys->resources, - &pci_prefetchable_memory, sys->mem_offset); + pci_add_resource(&sys->resources, &pci_io_ports); + pci_add_resource(&sys->resources, &pci_non_prefetchable_memory); + pci_add_resource(&sys->resources, &pci_prefetchable_memory); return 1; } diff --git a/trunk/arch/arm/mach-tegra/include/mach/smmu.h b/trunk/arch/arm/mach-tegra/include/mach/smmu.h deleted file mode 100644 index dad403a9cf00..000000000000 --- a/trunk/arch/arm/mach-tegra/include/mach/smmu.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * IOMMU API for SMMU in Tegra30 - * - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef MACH_SMMU_H -#define MACH_SMMU_H - -enum smmu_hwgrp { - HWGRP_AFI, - HWGRP_AVPC, - HWGRP_DC, - HWGRP_DCB, - HWGRP_EPP, - HWGRP_G2, - HWGRP_HC, - HWGRP_HDA, - HWGRP_ISP, - HWGRP_MPE, - HWGRP_NV, - HWGRP_NV2, - HWGRP_PPCS, - HWGRP_SATA, - HWGRP_VDE, - HWGRP_VI, - - HWGRP_COUNT, - - HWGRP_END = ~0, -}; - -#define HWG_AFI (1 << HWGRP_AFI) -#define HWG_AVPC (1 << HWGRP_AVPC) -#define HWG_DC (1 << HWGRP_DC) -#define HWG_DCB (1 << HWGRP_DCB) -#define HWG_EPP (1 << HWGRP_EPP) -#define HWG_G2 (1 << HWGRP_G2) -#define HWG_HC (1 << HWGRP_HC) -#define HWG_HDA (1 << HWGRP_HDA) -#define HWG_ISP (1 << HWGRP_ISP) -#define HWG_MPE (1 << HWGRP_MPE) -#define HWG_NV (1 << HWGRP_NV) -#define HWG_NV2 (1 << HWGRP_NV2) -#define HWG_PPCS (1 << HWGRP_PPCS) -#define HWG_SATA (1 << HWGRP_SATA) -#define HWG_VDE (1 << HWGRP_VDE) -#define HWG_VI (1 << HWGRP_VI) - -#endif /* MACH_SMMU_H */ diff --git a/trunk/arch/arm/mach-tegra/pcie.c b/trunk/arch/arm/mach-tegra/pcie.c index 14b29ab5d8f0..af8b63435727 100644 --- a/trunk/arch/arm/mach-tegra/pcie.c +++ b/trunk/arch/arm/mach-tegra/pcie.c @@ -408,7 +408,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys) pp->res[0].flags = IORESOURCE_IO; if (request_resource(&ioport_resource, &pp->res[0])) panic("Request PCIe IO resource failed\n"); - pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset); + pci_add_resource(&sys->resources, &pp->res[0]); /* * IORESOURCE_MEM @@ -427,7 +427,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys) pp->res[1].flags = IORESOURCE_MEM; if (request_resource(&iomem_resource, &pp->res[1])) panic("Request PCIe Memory resource failed\n"); - pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &pp->res[1]); /* * IORESOURCE_MEM | IORESOURCE_PREFETCH @@ -446,7 +446,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys) pp->res[2].flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; if (request_resource(&iomem_resource, &pp->res[2])) panic("Request PCIe Prefetch Memory resource failed\n"); - pci_add_resource_offset(&sys->resources, &pp->res[2], sys->mem_offset); + pci_add_resource(&sys->resources, &pp->res[2]); return 1; } diff --git a/trunk/arch/arm/mach-versatile/pci.c b/trunk/arch/arm/mach-versatile/pci.c index 51733b022d04..90069bce23bc 100644 --- a/trunk/arch/arm/mach-versatile/pci.c +++ b/trunk/arch/arm/mach-versatile/pci.c @@ -219,9 +219,9 @@ static int __init pci_versatile_setup_resources(struct list_head *resources) * the mem resource for this bus * the prefetch mem resource for this bus */ - pci_add_resource_offset(resources, &io_mem, sys->io_offset); - pci_add_resource_offset(resources, &non_mem, sys->mem_offset); - pci_add_resource_offset(resources, &pre_mem, sys->mem_offset); + pci_add_resource(resources, &io_mem); + pci_add_resource(resources, &non_mem); + pci_add_resource(resources, &pre_mem); goto out; diff --git a/trunk/arch/arm/mm/iomap.c b/trunk/arch/arm/mm/iomap.c index 4614208369f1..e62956e12030 100644 --- a/trunk/arch/arm/mm/iomap.c +++ b/trunk/arch/arm/mm/iomap.c @@ -32,6 +32,9 @@ EXPORT_SYMBOL(pcibios_min_io); unsigned long pcibios_min_mem = 0x01000000; EXPORT_SYMBOL(pcibios_min_mem); +unsigned int pci_flags = PCI_REASSIGN_ALL_RSRC; +EXPORT_SYMBOL(pci_flags); + void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { if ((unsigned long)addr >= VMALLOC_START && diff --git a/trunk/arch/arm/plat-iop/pci.c b/trunk/arch/arm/plat-iop/pci.c index 72768356447a..f4d40a27111e 100644 --- a/trunk/arch/arm/plat-iop/pci.c +++ b/trunk/arch/arm/plat-iop/pci.c @@ -215,8 +215,8 @@ int iop3xx_pci_setup(int nr, struct pci_sys_data *sys) sys->mem_offset = IOP3XX_PCI_LOWER_MEM_PA - *IOP3XX_OMWTVR0; sys->io_offset = IOP3XX_PCI_LOWER_IO_PA - *IOP3XX_OIOWTVR; - pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset); - pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); + pci_add_resource(&sys->resources, &res[0]); + pci_add_resource(&sys->resources, &res[1]); return 1; } diff --git a/trunk/arch/ia64/include/asm/pci.h b/trunk/arch/ia64/include/asm/pci.h index b22e5f5fa593..279b38ae74aa 100644 --- a/trunk/arch/ia64/include/asm/pci.h +++ b/trunk/arch/ia64/include/asm/pci.h @@ -108,6 +108,12 @@ static inline int pci_proc_domain(struct pci_bus *bus) return (pci_domain_nr(bus) != 0); } +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, struct pci_bus_region *region); + static inline struct resource * pcibios_select_root(struct pci_dev *pdev, struct resource *res) { diff --git a/trunk/arch/ia64/pci/pci.c b/trunk/arch/ia64/pci/pci.c index d1ce3200147c..f82f5d4b65fd 100644 --- a/trunk/arch/ia64/pci/pci.c +++ b/trunk/arch/ia64/pci/pci.c @@ -320,8 +320,7 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data) * Ignore these tiny memory ranges */ if (!((window->resource.flags & IORESOURCE_MEM) && (window->resource.end - window->resource.start < 16))) - pci_add_resource_offset(&info->resources, &window->resource, - window->offset); + pci_add_resource(&info->resources, &window->resource); return AE_OK; } @@ -396,6 +395,54 @@ pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; } +void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res) +{ + struct pci_controller *controller = PCI_CONTROLLER(dev); + unsigned long offset = 0; + int i; + + for (i = 0; i < controller->windows; i++) { + struct pci_window *window = &controller->window[i]; + if (!(window->resource.flags & res->flags)) + continue; + if (window->resource.start > res->start) + continue; + if (window->resource.end < res->end) + continue; + offset = window->offset; + break; + } + + region->start = res->start - offset; + region->end = res->end - offset; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, struct pci_bus_region *region) +{ + struct pci_controller *controller = PCI_CONTROLLER(dev); + unsigned long offset = 0; + int i; + + for (i = 0; i < controller->windows; i++) { + struct pci_window *window = &controller->window[i]; + if (!(window->resource.flags & res->flags)) + continue; + if (window->resource.start - window->offset > region->start) + continue; + if (window->resource.end - window->offset < region->end) + continue; + offset = window->offset; + break; + } + + res->start = region->start + offset; + res->end = region->end + offset; +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + static int __devinit is_valid_resource(struct pci_dev *dev, int idx) { unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM; @@ -417,11 +464,15 @@ static int __devinit is_valid_resource(struct pci_dev *dev, int idx) static void __devinit pcibios_fixup_resources(struct pci_dev *dev, int start, int limit) { + struct pci_bus_region region; int i; for (i = start; i < limit; i++) { if (!dev->resource[i].flags) continue; + region.start = dev->resource[i].start; + region.end = dev->resource[i].end; + pcibios_bus_to_resource(dev, &dev->resource[i], ®ion); if ((is_valid_resource(dev, i))) pci_claim_resource(dev, i); } diff --git a/trunk/arch/ia64/sn/kernel/io_init.c b/trunk/arch/ia64/sn/kernel/io_init.c index 238e2c511d94..0a36f082eaf1 100644 --- a/trunk/arch/ia64/sn/kernel/io_init.c +++ b/trunk/arch/ia64/sn/kernel/io_init.c @@ -297,8 +297,7 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) s64 status = 0; struct pci_controller *controller; struct pcibus_bussoft *prom_bussoft_ptr; - LIST_HEAD(resources); - int i; + status = sal_get_pcibus_info((u64) segment, (u64) busnum, (u64) ia64_tpa(&prom_bussoft_ptr)); @@ -316,15 +315,7 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) */ controller->platform_data = prom_bussoft_ptr; - sn_legacy_pci_window_fixup(controller, - prom_bussoft_ptr->bs_legacy_io, - prom_bussoft_ptr->bs_legacy_mem); - for (i = 0; i < controller->windows; i++) - pci_add_resource_offset(&resources, - &controller->window[i].resource, - controller->window[i].offset); - bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller, - &resources); + bus = pci_scan_bus(busnum, &pci_root_ops, controller); if (bus == NULL) goto error_return; /* error, or bus already scanned */ @@ -357,6 +348,9 @@ sn_bus_fixup(struct pci_bus *bus) return; } sn_common_bus_fixup(bus, prom_bussoft_ptr); + sn_legacy_pci_window_fixup(PCI_CONTROLLER(bus), + prom_bussoft_ptr->bs_legacy_io, + prom_bussoft_ptr->bs_legacy_mem); } list_for_each_entry(pci_dev, &bus->devices, bus_list) { sn_io_slot_fixup(pci_dev); diff --git a/trunk/arch/microblaze/include/asm/pci-bridge.h b/trunk/arch/microblaze/include/asm/pci-bridge.h index cb5d39794800..e9834b2991d0 100644 --- a/trunk/arch/microblaze/include/asm/pci-bridge.h +++ b/trunk/arch/microblaze/include/asm/pci-bridge.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device_node; diff --git a/trunk/arch/microblaze/include/asm/pci.h b/trunk/arch/microblaze/include/asm/pci.h index a0da88bf70c5..033137628e8a 100644 --- a/trunk/arch/microblaze/include/asm/pci.h +++ b/trunk/arch/microblaze/include/asm/pci.h @@ -94,6 +94,14 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, */ #define PCI_DMA_BUS_IS_PHYS (1) +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, + struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, + struct pci_bus_region *region); + static inline struct resource *pcibios_select_root(struct pci_dev *pdev, struct resource *res) { diff --git a/trunk/arch/microblaze/pci/pci-common.c b/trunk/arch/microblaze/pci/pci-common.c index d10403dadd2b..85f2ac1230a8 100644 --- a/trunk/arch/microblaze/pci/pci-common.c +++ b/trunk/arch/microblaze/pci/pci-common.c @@ -46,6 +46,9 @@ static int global_phb_number; /* Global phb counter */ /* ISA Memory physical address */ resource_size_t isa_mem_base; +/* Default PCI flags is 0 on ppc32, modified at boot on ppc64 */ +unsigned int pci_flags; + static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; unsigned long isa_io_base; @@ -830,7 +833,64 @@ int pci_proc_domain(struct pci_bus *bus) { struct pci_controller *hose = pci_bus_to_host(bus); - return 0; + if (!(pci_flags & PCI_ENABLE_PROC_DOMAINS)) + return 0; + if (pci_flags & PCI_COMPAT_DOMAIN_0) + return hose->global_number != 0; + return 1; +} + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + resource_size_t offset = 0, mask = (resource_size_t)-1; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + mask = 0xffffffffu; + } else if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + region->start = (res->start - offset) & mask; + region->end = (res->end - offset) & mask; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + resource_size_t offset = 0, mask = (resource_size_t)-1; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + mask = 0xffffffffu; + } else if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + res->start = (region->start + offset) & mask; + res->end = (region->end + offset) & mask; +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + +/* Fixup a bus resource into a linux resource */ +static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + resource_size_t offset = 0, mask = (resource_size_t)-1; + + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + mask = 0xffffffffu; + } else if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + res->start = (res->start + offset) & mask; + res->end = (res->end + offset) & mask; } /* This header fixup will do the resource fixup for all devices as they are @@ -850,7 +910,13 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) struct resource *res = dev->resource + i; if (!res->flags) continue; - if (res->start == 0) { + /* On platforms that have PCI_PROBE_ONLY set, we don't + * consider 0 as an unassigned BAR value. It's technically + * a valid value, but linux doesn't like it... so when we can + * re-assign things, we do so, but if we can't, we keep it + * around and hope for the best... + */ + if (res->start == 0 && !(pci_flags & PCI_PROBE_ONLY)) { pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]" \ "is unassigned\n", pci_name(dev), i, @@ -863,11 +929,18 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) continue; } - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n", + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] fixup...\n", pci_name(dev), i, (unsigned long long)res->start,\ (unsigned long long)res->end, (unsigned int)res->flags); + + fixup_resource(res, dev); + + pr_debug("PCI:%s %016llx-%016llx\n", + pci_name(dev), + (unsigned long long)res->start, + (unsigned long long)res->end); } } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); @@ -886,6 +959,10 @@ static int __devinit pcibios_uninitialized_bridge_resource(struct pci_bus *bus, u16 command; int i; + /* We don't do anything if PCI_PROBE_ONLY is set */ + if (pci_flags & PCI_PROBE_ONLY) + return 0; + /* Job is a bit different between memory and IO */ if (res->flags & IORESOURCE_MEM) { /* If the BAR is non-0 (res != pci_mem_offset) then it's @@ -960,6 +1037,9 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) (unsigned long long)res->end, (unsigned int)res->flags); + /* Perform fixup */ + fixup_resource(res, dev); + /* Try to detect uninitialized P2P bridge resources, * and clear them out so they get re-assigned later */ @@ -1027,6 +1107,9 @@ EXPORT_SYMBOL(pcibios_fixup_bus); static int skip_isa_ioresource_align(struct pci_dev *dev) { + if ((pci_flags & PCI_CAN_SKIP_ISA_ALIGN) && + !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA)) + return 1; return 0; } @@ -1153,6 +1236,8 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) * and as such ensure proper re-allocation * later. */ + if (pci_flags & PCI_REASSIGN_ALL_RSRC) + goto clear_resource; pr = pci_find_parent_resource(bus->self, res); if (pr == res) { /* this happens when the generic PCI @@ -1337,19 +1422,27 @@ void __init pcibios_resource_survey(void) list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); + if (!(pci_flags & PCI_REASSIGN_ALL_RSRC)) { + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + } /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the * bus available resources to avoid allocating things on top of them */ - list_for_each_entry(b, &pci_root_buses, node) - pcibios_reserve_legacy_regions(b); + if (!(pci_flags & PCI_PROBE_ONLY)) { + list_for_each_entry(b, &pci_root_buses, node) + pcibios_reserve_legacy_regions(b); + } - /* Now proceed to assigning things that were left unassigned */ - pr_debug("PCI: Assigning unassigned resources...\n"); - pci_assign_unassigned_resources(); + /* Now, if the platform didn't decide to blindly trust the firmware, + * we proceed to assigning things that were left unassigned + */ + if (!(pci_flags & PCI_PROBE_ONLY)) { + pr_debug("PCI: Assigning unassigned resources...\n"); + pci_assign_unassigned_resources(); + } } #ifdef CONFIG_HOTPLUG @@ -1442,7 +1535,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s res->end = res->start + IO_SPACE_LIMIT; res->flags = IORESOURCE_IO; } - pci_add_resource_offset(resources, res, hose->io_base_virt - _IO_BASE); + pci_add_resource(resources, res); pr_debug("PCI: PHB IO resource = %016llx-%016llx [%lx]\n", (unsigned long long)res->start, @@ -1465,7 +1558,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s res->flags = IORESOURCE_MEM; } - pci_add_resource_offset(resources, res, hose->pci_mem_offset); + pci_add_resource(resources, res); pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i, (unsigned long long)res->start, diff --git a/trunk/arch/mips/include/asm/pci.h b/trunk/arch/mips/include/asm/pci.h index fcd4060f6421..576397c69920 100644 --- a/trunk/arch/mips/include/asm/pci.h +++ b/trunk/arch/mips/include/asm/pci.h @@ -92,7 +92,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, #include #include #include -#include struct pci_dev; @@ -113,6 +112,12 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, } #endif +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index static inline int pci_proc_domain(struct pci_bus *bus) @@ -140,6 +145,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) #define arch_setup_msi_irqs arch_setup_msi_irqs #endif +extern int pci_probe_only; + extern char * (*pcibios_plat_setup)(char *str); #endif /* _ASM_PCI_H */ diff --git a/trunk/arch/mips/pci/fixup-cobalt.c b/trunk/arch/mips/pci/fixup-cobalt.c index 9553b14002dd..acacd1407c63 100644 --- a/trunk/arch/mips/pci/fixup-cobalt.c +++ b/trunk/arch/mips/pci/fixup-cobalt.c @@ -51,6 +51,67 @@ static void qube_raq_galileo_early_fixup(struct pci_dev *dev) DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_GT64111, qube_raq_galileo_early_fixup); +static void __devinit cobalt_legacy_ide_resource_fixup(struct pci_dev *dev, + struct resource *res) +{ + struct pci_controller *hose = (struct pci_controller *)dev->sysdata; + unsigned long offset = hose->io_offset; + struct resource orig = *res; + + if (!(res->flags & IORESOURCE_IO) || + !(res->flags & IORESOURCE_PCI_FIXED)) + return; + + res->start -= offset; + res->end -= offset; + dev_printk(KERN_DEBUG, &dev->dev, "converted legacy %pR to bus %pR\n", + &orig, res); +} + +static void __devinit cobalt_legacy_ide_fixup(struct pci_dev *dev) +{ + u32 class; + u8 progif; + + /* + * If the IDE controller is in legacy mode, pci_setup_device() fills in + * the resources with the legacy addresses that normally appear on the + * PCI bus, just as if we had read them from a BAR. + * + * However, with the GT-64111, those legacy addresses, e.g., 0x1f0, + * will never appear on the PCI bus because it converts memory accesses + * in the PCI I/O region (which is never at address zero) into I/O port + * accesses with no address translation. + * + * For example, if GT_DEF_PCI0_IO_BASE is 0x10000000, a load or store + * to physical address 0x100001f0 will become a PCI access to I/O port + * 0x100001f0. There's no way to generate an access to I/O port 0x1f0, + * but the VT82C586 IDE controller does respond at 0x100001f0 because + * it only decodes the low 24 bits of the address. + * + * When this quirk runs, the pci_dev resources should contain bus + * addresses, not Linux I/O port numbers, so convert legacy addresses + * like 0x1f0 to bus addresses like 0x100001f0. Later, we'll convert + * them back with pcibios_fixup_bus() or pcibios_bus_to_resource(). + */ + class = dev->class >> 8; + if (class != PCI_CLASS_STORAGE_IDE) + return; + + pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); + if ((progif & 1) == 0) { + cobalt_legacy_ide_resource_fixup(dev, &dev->resource[0]); + cobalt_legacy_ide_resource_fixup(dev, &dev->resource[1]); + } + if ((progif & 4) == 0) { + cobalt_legacy_ide_resource_fixup(dev, &dev->resource[2]); + cobalt_legacy_ide_resource_fixup(dev, &dev->resource[3]); + } +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, + cobalt_legacy_ide_fixup); + static void qube_raq_via_bmIDE_fixup(struct pci_dev *dev) { unsigned short cfgword; diff --git a/trunk/arch/mips/pci/pci-bcm1480.c b/trunk/arch/mips/pci/pci-bcm1480.c index 37b52dc3d27e..af8c31996965 100644 --- a/trunk/arch/mips/pci/pci-bcm1480.c +++ b/trunk/arch/mips/pci/pci-bcm1480.c @@ -204,7 +204,7 @@ static int __init bcm1480_pcibios_init(void) uint64_t reg; /* CFE will assign PCI resources */ - pci_set_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; /* Avoid ISA compat ranges. */ PCIBIOS_MIN_IO = 0x00008000UL; diff --git a/trunk/arch/mips/pci/pci-ip27.c b/trunk/arch/mips/pci/pci-ip27.c index 0fbe4c0c170a..193e9494f98e 100644 --- a/trunk/arch/mips/pci/pci-ip27.c +++ b/trunk/arch/mips/pci/pci-ip27.c @@ -50,7 +50,7 @@ int __cpuinit bridge_probe(nasid_t nasid, int widget_id, int masterwid) bridge_t *bridge; int slot; - pci_set_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; printk("a bridge\n"); diff --git a/trunk/arch/mips/pci/pci-lantiq.c b/trunk/arch/mips/pci/pci-lantiq.c index 030c77e7926e..be1e1afe12c3 100644 --- a/trunk/arch/mips/pci/pci-lantiq.c +++ b/trunk/arch/mips/pci/pci-lantiq.c @@ -270,8 +270,7 @@ static int __devinit ltq_pci_probe(struct platform_device *pdev) { struct ltq_pci_data *ltq_pci_data = (struct ltq_pci_data *) pdev->dev.platform_data; - - pci_clear_flags(PCI_PROBE_ONLY); + pci_probe_only = 0; ltq_pci_irq_map = ltq_pci_data->irq; ltq_pci_membase = ioremap_nocache(PCI_CR_BASE_ADDR, PCI_CR_SIZE); ltq_pci_mapped_cfg = diff --git a/trunk/arch/mips/pci/pci-sb1250.c b/trunk/arch/mips/pci/pci-sb1250.c index dd97f3a83baa..1711e8e101bc 100644 --- a/trunk/arch/mips/pci/pci-sb1250.c +++ b/trunk/arch/mips/pci/pci-sb1250.c @@ -213,7 +213,7 @@ static int __init sb1250_pcibios_init(void) uint64_t reg; /* CFE will assign PCI resources */ - pci_set_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; /* Avoid ISA compat ranges. */ PCIBIOS_MIN_IO = 0x00008000UL; diff --git a/trunk/arch/mips/pci/pci-xlr.c b/trunk/arch/mips/pci/pci-xlr.c index 1644805a6730..3d701a962ef4 100644 --- a/trunk/arch/mips/pci/pci-xlr.c +++ b/trunk/arch/mips/pci/pci-xlr.c @@ -292,7 +292,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) static int __init pcibios_init(void) { /* PSB assigns PCI resources */ - pci_set_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; pci_config_base = ioremap(DEFAULT_PCI_CONFIG_BASE, 16 << 20); /* Extend IO port for memory mapped io */ diff --git a/trunk/arch/mips/pci/pci.c b/trunk/arch/mips/pci/pci.c index 0514866fa925..15521505ebe8 100644 --- a/trunk/arch/mips/pci/pci.c +++ b/trunk/arch/mips/pci/pci.c @@ -20,9 +20,16 @@ #include /* - * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource - * assignments. + * Indicate whether we respect the PCI setup left by the firmware. + * + * Make this long-lived so that we know when shutting down + * whether we probed only or not. */ +int pci_probe_only; + +#define PCI_ASSIGN_ALL_BUSSES 1 + +unsigned int pci_probe = PCI_ASSIGN_ALL_BUSSES; /* * The PCI controller list. @@ -85,12 +92,11 @@ static void __devinit pcibios_scanbus(struct pci_controller *hose) if (!hose->iommu) PCI_DMA_BUS_IS_PHYS = 1; - if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY)) + if (hose->get_busno && pci_probe_only) next_busno = (*hose->get_busno)(); - pci_add_resource_offset(&resources, - hose->mem_resource, hose->mem_offset); - pci_add_resource_offset(&resources, hose->io_resource, hose->io_offset); + pci_add_resource(&resources, hose->mem_resource); + pci_add_resource(&resources, hose->io_resource); bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose, &resources); if (!bus) @@ -109,7 +115,7 @@ static void __devinit pcibios_scanbus(struct pci_controller *hose) need_domain_info = 1; } - if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (!pci_probe_only) { pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); pci_enable_bridges(bus); @@ -235,7 +241,7 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask) unsigned int pcibios_assign_all_busses(void) { - return 1; + return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; } int pcibios_enable_device(struct pci_dev *dev, int mask) @@ -248,13 +254,42 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pcibios_plat_dev_init(dev); } +static void pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_controller *hose = (struct pci_controller *)bus->sysdata; + unsigned long offset = 0; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!dev->resource[i].start) + continue; + if (dev->resource[i].flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (dev->resource[i].flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + dev->resource[i].start += offset; + dev->resource[i].end += offset; + } +} + void __devinit pcibios_fixup_bus(struct pci_bus *bus) { + /* Propagate hose info into the subordinate devices. */ + struct pci_dev *dev = bus->self; - if (pci_has_flag(PCI_PROBE_ONLY) && dev && + if (pci_probe_only && dev && (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { pci_read_bridge_bases(bus); + pcibios_fixup_device_resources(dev, bus); + } + + list_for_each_entry(dev, &bus->devices, bus_list) { + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); } } @@ -264,7 +299,40 @@ pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_controller *hose = (struct pci_controller *)dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void __devinit +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_controller *hose = (struct pci_controller *)dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + #ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); #endif diff --git a/trunk/arch/mn10300/include/asm/pci.h b/trunk/arch/mn10300/include/asm/pci.h index 8137c25c4e15..6095a28561dd 100644 --- a/trunk/arch/mn10300/include/asm/pci.h +++ b/trunk/arch/mn10300/include/asm/pci.h @@ -85,6 +85,22 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include +/** + * pcibios_resource_to_bus - convert resource to PCI bus address + * @dev: device which owns this resource + * @region: converted bus-centric region (start,end) + * @res: resource to convert + * + * Convert a resource to a PCI device bus address or bus window. + */ +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, + struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, + struct pci_bus_region *region); + static inline struct resource * pcibios_select_root(struct pci_dev *pdev, struct resource *res) { diff --git a/trunk/arch/mn10300/unit-asb2305/pci.c b/trunk/arch/mn10300/unit-asb2305/pci.c index 6dce9fc2cf3c..a7c5f08ca9f5 100644 --- a/trunk/arch/mn10300/unit-asb2305/pci.c +++ b/trunk/arch/mn10300/unit-asb2305/pci.c @@ -32,7 +32,8 @@ struct pci_ops *pci_root_ops; * insert specific PCI bus resources instead of using the platform-level bus * resources directly for the PCI root bus. * - * These are configured and inserted by pcibios_init(). + * These are configured and inserted by pcibios_init() and are attached to the + * root bus by pcibios_fixup_bus(). */ static struct resource pci_ioport_resource = { .name = "PCI IO", @@ -76,6 +77,52 @@ static inline int __query(const struct pci_bus *bus, unsigned int devfn) return 1; } +/* + * translate Linuxcentric addresses to PCI bus addresses + */ +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + if (res->flags & IORESOURCE_IO) { + region->start = (res->start & 0x00ffffff); + region->end = (res->end & 0x00ffffff); + } + + if (res->flags & IORESOURCE_MEM) { + region->start = (res->start & 0x03ffffff) | MEM_PAGING_REG; + region->end = (res->end & 0x03ffffff) | MEM_PAGING_REG; + } + +#if 0 + printk(KERN_DEBUG "RES->BUS: %lx-%lx => %lx-%lx\n", + res->start, res->end, region->start, region->end); +#endif +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +/* + * translate PCI bus addresses to Linuxcentric addresses + */ +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + if (res->flags & IORESOURCE_IO) { + res->start = (region->start & 0x00ffffff) | 0xbe000000; + res->end = (region->end & 0x00ffffff) | 0xbe000000; + } + + if (res->flags & IORESOURCE_MEM) { + res->start = (region->start & 0x03ffffff) | 0xb8000000; + res->end = (region->end & 0x03ffffff) | 0xb8000000; + } + +#if 0 + printk(KERN_INFO "BUS->RES: %lx-%lx => %lx-%lx\n", + region->start, region->end, res->start, res->end); +#endif +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + /* * */ @@ -317,6 +364,9 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) if (!dev->resource[i].flags) continue; + region.start = dev->resource[i].start; + region.end = dev->resource[i].end; + pcibios_bus_to_resource(dev, &dev->resource[i], ®ion); if (is_valid_resource(dev, i)) pci_claim_resource(dev, i); } @@ -347,7 +397,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) */ static int __init pcibios_init(void) { - resource_size_t io_offset, mem_offset; LIST_HEAD(resources); ioport_resource.start = 0xA0000000; @@ -371,13 +420,8 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware [mempage %08x]\n", MEM_PAGING_REG); - io_offset = pci_ioport_resource.start - - (pci_ioport_resource.start & 0x00ffffff); - mem_offset = pci_iomem_resource.start - - ((pci_iomem_resource.start & 0x03ffffff) | MEM_PAGING_REG); - - pci_add_resource_offset(&resources, &pci_ioport_resource, io_offset); - pci_add_resource_offset(&resources, &pci_iomem_resource, mem_offset); + pci_add_resource(&resources, &pci_ioport_resource); + pci_add_resource(&resources, &pci_iomem_resource); pci_root_bus = pci_scan_root_bus(NULL, 0, &pci_direct_ampci, NULL, &resources); diff --git a/trunk/arch/parisc/include/asm/pci.h b/trunk/arch/parisc/include/asm/pci.h index 3234f492d575..2242a5c636c2 100644 --- a/trunk/arch/parisc/include/asm/pci.h +++ b/trunk/arch/parisc/include/asm/pci.h @@ -82,8 +82,38 @@ struct pci_hba_data { #ifdef CONFIG_64BIT #define PCI_F_EXTEND 0xffffffff00000000UL +#define PCI_IS_LMMIO(hba,a) pci_is_lmmio(hba,a) + +/* We need to know if an address is LMMMIO or GMMIO. + * LMMIO requires mangling and GMMIO we must use as-is. + */ +static __inline__ int pci_is_lmmio(struct pci_hba_data *hba, unsigned long a) +{ + return(((a) & PCI_F_EXTEND) == PCI_F_EXTEND); +} + +/* +** Convert between PCI (IO_VIEW) addresses and processor (PA_VIEW) addresses. +** See pci.c for more conversions used by Generic PCI code. +** +** Platform characteristics/firmware guarantee that +** (1) PA_VIEW - IO_VIEW = lmmio_offset for both LMMIO and ELMMIO +** (2) PA_VIEW == IO_VIEW for GMMIO +*/ +#define PCI_BUS_ADDR(hba,a) (PCI_IS_LMMIO(hba,a) \ + ? ((a) - hba->lmmio_space_offset) /* mangle LMMIO */ \ + : (a)) /* GMMIO */ +#define PCI_HOST_ADDR(hba,a) (((a) & PCI_F_EXTEND) == 0 \ + ? (a) + hba->lmmio_space_offset \ + : (a)) + #else /* !CONFIG_64BIT */ + +#define PCI_BUS_ADDR(hba,a) (a) +#define PCI_HOST_ADDR(hba,a) (a) #define PCI_F_EXTEND 0UL +#define PCI_IS_LMMIO(hba,a) (1) /* 32-bit doesn't support GMMIO */ + #endif /* !CONFIG_64BIT */ /* @@ -215,6 +245,14 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, } #endif +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); + +extern void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't need to penalize isa irq's */ diff --git a/trunk/arch/parisc/kernel/pci.c b/trunk/arch/parisc/kernel/pci.c index 74d544b1cd22..9efd97405317 100644 --- a/trunk/arch/parisc/kernel/pci.c +++ b/trunk/arch/parisc/kernel/pci.c @@ -195,6 +195,58 @@ void __init pcibios_init_bus(struct pci_bus *bus) pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bridge_ctl); } +/* called by drivers/pci/setup-bus.c:pci_setup_bridge(). */ +void __devinit pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res) +{ +#ifdef CONFIG_64BIT + struct pci_hba_data *hba = HBA_DATA(dev->bus->bridge->platform_data); +#endif + + if (res->flags & IORESOURCE_IO) { + /* + ** I/O space may see busnumbers here. Something + ** in the form of 0xbbxxxx where bb is the bus num + ** and xxxx is the I/O port space address. + ** Remaining address translation are done in the + ** PCI Host adapter specific code - ie dino_out8. + */ + region->start = PCI_PORT_ADDR(res->start); + region->end = PCI_PORT_ADDR(res->end); + } else if (res->flags & IORESOURCE_MEM) { + /* Convert MMIO addr to PCI addr (undo global virtualization) */ + region->start = PCI_BUS_ADDR(hba, res->start); + region->end = PCI_BUS_ADDR(hba, res->end); + } + + DBG_RES("pcibios_resource_to_bus(%02x %s [%lx,%lx])\n", + dev->bus->number, res->flags & IORESOURCE_IO ? "IO" : "MEM", + region->start, region->end); +} + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ +#ifdef CONFIG_64BIT + struct pci_hba_data *hba = HBA_DATA(dev->bus->bridge->platform_data); +#endif + + if (res->flags & IORESOURCE_MEM) { + res->start = PCI_HOST_ADDR(hba, region->start); + res->end = PCI_HOST_ADDR(hba, region->end); + } + + if (res->flags & IORESOURCE_IO) { + res->start = region->start; + res->end = region->end; + } +} + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); +#endif + /* * pcibios align resources() is called every time generic PCI code * wants to generate a new address. The process of looking for diff --git a/trunk/arch/powerpc/include/asm/pci.h b/trunk/arch/powerpc/include/asm/pci.h index 6653f2743c4e..f54b3d26ce9d 100644 --- a/trunk/arch/powerpc/include/asm/pci.h +++ b/trunk/arch/powerpc/include/asm/pci.h @@ -154,6 +154,14 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #endif /* CONFIG_PPC64 */ +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, + struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, + struct resource *res, + struct pci_bus_region *region); + extern void pcibios_claim_one_bus(struct pci_bus *b); extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); @@ -182,7 +190,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, resource_size_t *start, resource_size_t *end); -extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose); extern void pcibios_setup_bus_devices(struct pci_bus *bus); extern void pcibios_setup_bus_self(struct pci_bus *bus); extern void pcibios_setup_phb_io_space(struct pci_controller *hose); diff --git a/trunk/arch/powerpc/include/asm/ppc-pci.h b/trunk/arch/powerpc/include/asm/ppc-pci.h index 80fa704d410f..e660b37aa7d0 100644 --- a/trunk/arch/powerpc/include/asm/ppc-pci.h +++ b/trunk/arch/powerpc/include/asm/ppc-pci.h @@ -45,6 +45,8 @@ extern void init_pci_config_tokens (void); extern unsigned long get_phb_buid (struct device_node *); extern int rtas_setup_phb(struct pci_controller *phb); +extern unsigned long pci_probe_only; + #ifdef CONFIG_EEH void pci_addr_cache_build(void); diff --git a/trunk/arch/powerpc/kernel/pci-common.c b/trunk/arch/powerpc/kernel/pci-common.c index 8e78e93c8185..d0373bcb7c9d 100644 --- a/trunk/arch/powerpc/kernel/pci-common.c +++ b/trunk/arch/powerpc/kernel/pci-common.c @@ -49,6 +49,9 @@ static int global_phb_number; /* Global phb counter */ /* ISA Memory physical address */ resource_size_t isa_mem_base; +/* Default PCI flags is 0 on ppc32, modified at boot on ppc64 */ +unsigned int pci_flags = 0; + static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; @@ -831,6 +834,60 @@ int pci_proc_domain(struct pci_bus *bus) return 1; } +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + resource_size_t offset = 0, mask = (resource_size_t)-1; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + mask = 0xffffffffu; + } else if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + region->start = (res->start - offset) & mask; + region->end = (res->end - offset) & mask; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + resource_size_t offset = 0, mask = (resource_size_t)-1; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + if (!hose) + return; + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + mask = 0xffffffffu; + } else if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + res->start = (region->start + offset) & mask; + res->end = (region->end + offset) & mask; +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + +/* Fixup a bus resource into a linux resource */ +static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + resource_size_t offset = 0, mask = (resource_size_t)-1; + + if (res->flags & IORESOURCE_IO) { + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + mask = 0xffffffffu; + } else if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + res->start = (res->start + offset) & mask; + res->end = (res->end + offset) & mask; +} + + /* This header fixup will do the resource fixup for all devices as they are * probed, but not for bridge ranges */ @@ -870,11 +927,18 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) continue; } - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n", + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] fixup...\n", pci_name(dev), i, (unsigned long long)res->start,\ (unsigned long long)res->end, (unsigned int)res->flags); + + fixup_resource(res, dev); + + pr_debug("PCI:%s %016llx-%016llx\n", + pci_name(dev), + (unsigned long long)res->start, + (unsigned long long)res->end); } /* Call machine specific resource fixup */ @@ -976,18 +1040,27 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) continue; } - pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n", + pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", pci_name(dev), i, (unsigned long long)res->start,\ (unsigned long long)res->end, (unsigned int)res->flags); + /* Perform fixup */ + fixup_resource(res, dev); + /* Try to detect uninitialized P2P bridge resources, * and clear them out so they get re-assigned later */ if (pcibios_uninitialized_bridge_resource(bus, res)) { res->flags = 0; pr_debug("PCI:%s (unassigned)\n", pci_name(dev)); + } else { + + pr_debug("PCI:%s %016llx-%016llx\n", + pci_name(dev), + (unsigned long long)res->start, + (unsigned long long)res->end); } } } @@ -1477,11 +1550,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -resource_size_t pcibios_io_space_offset(struct pci_controller *hose) -{ - return (unsigned long) hose->io_base_virt - _IO_BASE; -} - static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) { struct resource *res; @@ -1506,7 +1574,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s (unsigned long long)res->start, (unsigned long long)res->end, (unsigned long)res->flags); - pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose)); + pci_add_resource(resources, res); /* Hookup PHB Memory resources */ for (i = 0; i < 3; ++i) { @@ -1529,7 +1597,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s (unsigned long long)res->start, (unsigned long long)res->end, (unsigned long)res->flags); - pci_add_resource_offset(resources, res, hose->pci_mem_offset); + pci_add_resource(resources, res); } pr_debug("PCI: PHB MEM offset = %016llx\n", diff --git a/trunk/arch/powerpc/kernel/pci_32.c b/trunk/arch/powerpc/kernel/pci_32.c index 4b06ec5a502e..fdd1a3d951dc 100644 --- a/trunk/arch/powerpc/kernel/pci_32.c +++ b/trunk/arch/powerpc/kernel/pci_32.c @@ -219,9 +219,9 @@ void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) struct resource *res = &hose->io_resource; /* Fixup IO space offset */ - io_offset = pcibios_io_space_offset(hose); - res->start += io_offset; - res->end += io_offset; + io_offset = (unsigned long)hose->io_base_virt - isa_io_base; + res->start = (res->start + io_offset) & 0xffffffffu; + res->end = (res->end + io_offset) & 0xffffffffu; } static int __init pcibios_init(void) diff --git a/trunk/arch/powerpc/kernel/pci_64.c b/trunk/arch/powerpc/kernel/pci_64.c index 94a54f61d341..3318d39b7d4c 100644 --- a/trunk/arch/powerpc/kernel/pci_64.c +++ b/trunk/arch/powerpc/kernel/pci_64.c @@ -33,6 +33,8 @@ #include #include +unsigned long pci_probe_only = 1; + /* pci_io_base -- the base address from which io bars are offsets. * This is the lowest I/O base address (so bar values are always positive), * and it *must* be the start of ISA space if an ISA bus exists because @@ -53,6 +55,9 @@ static int __init pcibios_init(void) */ ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; + if (pci_probe_only) + pci_add_flags(PCI_PROBE_ONLY); + /* On ppc64, we always enable PCI domains and we keep domain 0 * backward compatible in /proc for video cards */ @@ -168,7 +173,7 @@ static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose) return -ENOMEM; /* Fixup hose IO resource */ - io_virt_offset = pcibios_io_space_offset(hose); + io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE; hose->io_resource.start += io_virt_offset; hose->io_resource.end += io_virt_offset; diff --git a/trunk/arch/powerpc/kernel/pci_of_scan.c b/trunk/arch/powerpc/kernel/pci_of_scan.c index 89dde171a6fa..b37d0b5a796e 100644 --- a/trunk/arch/powerpc/kernel/pci_of_scan.c +++ b/trunk/arch/powerpc/kernel/pci_of_scan.c @@ -75,7 +75,6 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) { u64 base, size; unsigned int flags; - struct pci_bus_region region; struct resource *res; const u32 *addrs; u32 i; @@ -107,11 +106,10 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); continue; } + res->start = base; + res->end = base + size - 1; res->flags = flags; res->name = pci_name(dev); - region.start = base; - region.end = base + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); } } @@ -211,7 +209,6 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev) struct pci_bus *bus; const u32 *busrange, *ranges; int len, i, mode; - struct pci_bus_region region; struct resource *res; unsigned int flags; u64 size; @@ -273,10 +270,9 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev) res = bus->resource[i]; ++i; } + res->start = of_read_number(&ranges[1], 2); + res->end = res->start + size - 1; res->flags = flags; - region.start = of_read_number(&ranges[1], 2); - region.end = region.start + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); } sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), bus->number); diff --git a/trunk/arch/powerpc/kernel/rtas_pci.c b/trunk/arch/powerpc/kernel/rtas_pci.c index 179af906dcda..517bd86bc3f0 100644 --- a/trunk/arch/powerpc/kernel/rtas_pci.c +++ b/trunk/arch/powerpc/kernel/rtas_pci.c @@ -279,7 +279,7 @@ void __init find_and_init_phbs(void) eeh_dev_phb_init(); /* - * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties + * pci_probe_only and pci_assign_all_buses can be set via properties * in chosen. */ if (of_chosen) { @@ -287,12 +287,8 @@ void __init find_and_init_phbs(void) prop = of_get_property(of_chosen, "linux,pci-probe-only", NULL); - if (prop) { - if (*prop) - pci_add_flags(PCI_PROBE_ONLY); - else - pci_clear_flags(PCI_PROBE_ONLY); - } + if (prop) + pci_probe_only = *prop; #ifdef CONFIG_PPC32 /* Will be made generic soon */ prop = of_get_property(of_chosen, diff --git a/trunk/arch/powerpc/platforms/maple/pci.c b/trunk/arch/powerpc/platforms/maple/pci.c index 465ee8f5c086..401e3f3f74c8 100644 --- a/trunk/arch/powerpc/platforms/maple/pci.c +++ b/trunk/arch/powerpc/platforms/maple/pci.c @@ -620,7 +620,7 @@ void __init maple_pci_init(void) } /* Tell pci.c to not change any resource allocations. */ - pci_add_flags(PCI_PROBE_ONLY); + pci_probe_only = 1; } int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) diff --git a/trunk/arch/powerpc/platforms/pasemi/pci.c b/trunk/arch/powerpc/platforms/pasemi/pci.c index aa862713258c..b6a0ec45c695 100644 --- a/trunk/arch/powerpc/platforms/pasemi/pci.c +++ b/trunk/arch/powerpc/platforms/pasemi/pci.c @@ -229,6 +229,9 @@ void __init pas_pci_init(void) /* Setup the linkage between OF nodes and PHBs */ pci_devs_phb_init(); + + /* Use the common resource allocation mechanism */ + pci_probe_only = 1; } void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) diff --git a/trunk/arch/powerpc/platforms/powermac/pci.c b/trunk/arch/powerpc/platforms/powermac/pci.c index 43bbe1bda939..31a7d3a7ce25 100644 --- a/trunk/arch/powerpc/platforms/powermac/pci.c +++ b/trunk/arch/powerpc/platforms/powermac/pci.c @@ -1059,6 +1059,9 @@ void __init pmac_pci_init(void) } /* pmac_check_ht_link(); */ + /* We can allocate missing resources if any */ + pci_probe_only = 0; + #else /* CONFIG_PPC64 */ init_p2pbridge(); init_second_ohare(); diff --git a/trunk/arch/powerpc/platforms/powernv/pci-ioda.c b/trunk/arch/powerpc/platforms/powernv/pci-ioda.c index fbdd74dac3ac..5e155dfc4320 100644 --- a/trunk/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/trunk/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1299,14 +1299,15 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); - /* We set both PCI_PROBE_ONLY and PCI_REASSIGN_ALL_RSRC. This is an + /* We set both probe_only and PCI_REASSIGN_ALL_RSRC. This is an * odd combination which essentially means that we skip all resource * fixups and assignments in the generic code, and do it all * ourselves here */ + pci_probe_only = 1; ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb; ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; - pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC); + pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); diff --git a/trunk/arch/powerpc/platforms/powernv/pci.c b/trunk/arch/powerpc/platforms/powernv/pci.c index be3cfc5ceabb..214478d781ae 100644 --- a/trunk/arch/powerpc/platforms/powernv/pci.c +++ b/trunk/arch/powerpc/platforms/powernv/pci.c @@ -562,7 +562,10 @@ void __init pnv_pci_init(void) { struct device_node *np; - pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN); + pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN); + + /* We do not want to just probe */ + pci_probe_only = 0; /* OPAL absent, try POPAL first then RTAS detection of PHBs */ if (!firmware_has_feature(FW_FEATURE_OPAL)) { diff --git a/trunk/arch/powerpc/platforms/pseries/pci_dlpar.c b/trunk/arch/powerpc/platforms/pseries/pci_dlpar.c index 8b7bafa489c2..fbb21fc3080b 100644 --- a/trunk/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/trunk/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -84,7 +84,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus) list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { pr_debug(" * Removing %s...\n", pci_name(dev)); eeh_remove_bus_device(dev); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); } } EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); diff --git a/trunk/arch/powerpc/platforms/pseries/setup.c b/trunk/arch/powerpc/platforms/pseries/setup.c index 51ecac920dd8..8f137af616af 100644 --- a/trunk/arch/powerpc/platforms/pseries/setup.c +++ b/trunk/arch/powerpc/platforms/pseries/setup.c @@ -383,9 +383,6 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); - /* By default, only probe PCI (can be overriden by rtas_pci) */ - pci_add_flags(PCI_PROBE_ONLY); - /* Find and initialize PCI host bridges */ init_pci_config_tokens(); eeh_pseries_init(); diff --git a/trunk/arch/powerpc/platforms/wsp/wsp_pci.c b/trunk/arch/powerpc/platforms/wsp/wsp_pci.c index 763014cd1e62..d24b3acf858e 100644 --- a/trunk/arch/powerpc/platforms/wsp/wsp_pci.c +++ b/trunk/arch/powerpc/platforms/wsp/wsp_pci.c @@ -682,6 +682,7 @@ static int __init wsp_setup_one_phb(struct device_node *np) /* XXX Force re-assigning of everything for now */ pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC | PCI_ENABLE_PROC_DOMAINS); + pci_probe_only = 0; /* Calculate how the TCE space is divided */ phb->dma32_base = 0; diff --git a/trunk/arch/sh/drivers/pci/pci.c b/trunk/arch/sh/drivers/pci/pci.c index 9d10a3cb8797..1e7b0e2e764d 100644 --- a/trunk/arch/sh/drivers/pci/pci.c +++ b/trunk/arch/sh/drivers/pci/pci.c @@ -37,20 +37,11 @@ static void __devinit pcibios_scanbus(struct pci_channel *hose) static int next_busno; static int need_domain_info; LIST_HEAD(resources); - struct resource *res; - resource_size_t offset; int i; struct pci_bus *bus; - for (i = 0; i < hose->nr_resources; i++) { - res = hose->resources + i; - offset = 0; - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - pci_add_resource_offset(&resources, res, offset); - } + for (i = 0; i < hose->nr_resources; i++) + pci_add_resource(&resources, hose->resources + i); bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose, &resources); @@ -152,12 +143,42 @@ static int __init pcibios_init(void) } subsys_initcall(pcibios_init); +static void pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_channel *hose = bus->sysdata; + unsigned long offset = 0; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!dev->resource[i].start) + continue; + if (dev->resource[i].flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (dev->resource[i].flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + dev->resource[i].start += offset; + dev->resource[i].end += offset; + } +} + /* * Called after each bus is probed, but before its children * are examined. */ void __devinit pcibios_fixup_bus(struct pci_bus *bus) { + struct pci_dev *dev; + struct list_head *ln; + + for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { + dev = pci_dev_b(ln); + + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); + } } /* @@ -187,6 +208,36 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, return start; } +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + int pcibios_enable_device(struct pci_dev *dev, int mask) { return pci_enable_resources(dev, mask); @@ -330,6 +381,8 @@ EXPORT_SYMBOL(pci_iounmap); #endif /* CONFIG_GENERIC_IOMAP */ #ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); #endif diff --git a/trunk/arch/sh/include/asm/pci.h b/trunk/arch/sh/include/asm/pci.h index bff96c2e7d25..cb21e2399dc1 100644 --- a/trunk/arch/sh/include/asm/pci.h +++ b/trunk/arch/sh/include/asm/pci.h @@ -114,6 +114,12 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, /* Board-specific fixup routines. */ int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin); +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + #define pci_domain_nr(bus) ((struct pci_channel *)(bus)->sysdata)->index static inline int pci_proc_domain(struct pci_bus *bus) diff --git a/trunk/arch/sparc/include/asm/pci_32.h b/trunk/arch/sparc/include/asm/pci_32.h index dc503297481f..6de7f7bf956a 100644 --- a/trunk/arch/sparc/include/asm/pci_32.h +++ b/trunk/arch/sparc/include/asm/pci_32.h @@ -52,6 +52,14 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, * 64Kbytes by the Host controller. */ +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); + +extern void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { return PCI_IRQ_NONE; diff --git a/trunk/arch/sparc/include/asm/pci_64.h b/trunk/arch/sparc/include/asm/pci_64.h index 1633b718d3bc..755a4bb6bcd3 100644 --- a/trunk/arch/sparc/include/asm/pci_64.h +++ b/trunk/arch/sparc/include/asm/pci_64.h @@ -73,6 +73,14 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); + +extern void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { return PCI_IRQ_NONE; diff --git a/trunk/arch/sparc/kernel/leon_pci.c b/trunk/arch/sparc/kernel/leon_pci.c index aba6b958b2a5..c7bec25fdb1c 100644 --- a/trunk/arch/sparc/kernel/leon_pci.c +++ b/trunk/arch/sparc/kernel/leon_pci.c @@ -15,19 +15,14 @@ /* The LEON architecture does not rely on a BIOS or bootloader to setup * PCI for us. The Linux generic routines are used to setup resources, - * reset values of configuration-space register settings are preserved. - * - * PCI Memory and Prefetchable Memory is direct-mapped. However I/O Space is - * accessed through a Window which is translated to low 64KB in PCI space, the - * first 4KB is not used so 60KB is available. + * reset values of confuration-space registers settings ae preseved. */ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) { LIST_HEAD(resources); struct pci_bus *root_bus; - pci_add_resource_offset(&resources, &info->io_space, - info->io_space.start - 0x1000); + pci_add_resource(&resources, &info->io_space); pci_add_resource(&resources, &info->mem_space); root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info, @@ -43,6 +38,44 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) } } +/* PCI Memory and Prefetchable Memory is direct-mapped. However I/O Space is + * accessed through a Window which is translated to low 64KB in PCI space, the + * first 4KB is not used so 60KB is available. + * + * This function is used by generic code to translate resource addresses into + * PCI addresses. + */ +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct leon_pci_info *info = dev->bus->sysdata; + + region->start = res->start; + region->end = res->end; + + if (res->flags & IORESOURCE_IO) { + region->start -= (info->io_space.start - 0x1000); + region->end -= (info->io_space.start - 0x1000); + } +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +/* see pcibios_resource_to_bus() comment */ +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct leon_pci_info *info = dev->bus->sysdata; + + res->start = region->start; + res->end = region->end; + + if (res->flags & IORESOURCE_IO) { + res->start += (info->io_space.start - 0x1000); + res->end += (info->io_space.start - 0x1000); + } +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + void __devinit pcibios_fixup_bus(struct pci_bus *pbus) { struct leon_pci_info *info = pbus->sysdata; diff --git a/trunk/arch/sparc/kernel/pci.c b/trunk/arch/sparc/kernel/pci.c index fdaf21811670..bb8bc2e519ac 100644 --- a/trunk/arch/sparc/kernel/pci.c +++ b/trunk/arch/sparc/kernel/pci.c @@ -375,6 +375,13 @@ static void __devinit apb_calc_first_last(u8 map, u32 *first_p, u32 *last_p) *last_p = last; } +static void pci_resource_adjust(struct resource *res, + struct resource *root) +{ + res->start += root->start; + res->end += root->start; +} + /* For PCI bus devices which lack a 'ranges' property we interrogate * the config space values to set the resources, just like the generic * Linux PCI probing code does. @@ -383,8 +390,7 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev, struct pci_bus *bus, struct pci_pbm_info *pbm) { - struct pci_bus_region region; - struct resource *res, res2; + struct resource *res; u8 io_base_lo, io_limit_lo; u16 mem_base_lo, mem_limit_lo; unsigned long base, limit; @@ -406,14 +412,11 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev, res = bus->resource[0]; if (base <= limit) { res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO; - res2.flags = res->flags; - region.start = base; - region.end = limit + 0xfff; - pcibios_bus_to_resource(dev, &res2, ®ion); if (!res->start) - res->start = res2.start; + res->start = base; if (!res->end) - res->end = res2.end; + res->end = limit + 0xfff; + pci_resource_adjust(res, &pbm->io_space); } pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo); @@ -425,9 +428,9 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev, if (base <= limit) { res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM); - region.start = base; - region.end = limit + 0xfffff; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = base; + res->end = limit + 0xfffff; + pci_resource_adjust(res, &pbm->mem_space); } pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo); @@ -456,9 +459,9 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev, if (base <= limit) { res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH); - region.start = base; - region.end = limit + 0xfffff; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = base; + res->end = limit + 0xfffff; + pci_resource_adjust(res, &pbm->mem_space); } } @@ -469,7 +472,6 @@ static void __devinit apb_fake_ranges(struct pci_dev *dev, struct pci_bus *bus, struct pci_pbm_info *pbm) { - struct pci_bus_region region; struct resource *res; u32 first, last; u8 map; @@ -477,18 +479,18 @@ static void __devinit apb_fake_ranges(struct pci_dev *dev, pci_read_config_byte(dev, APB_IO_ADDRESS_MAP, &map); apb_calc_first_last(map, &first, &last); res = bus->resource[0]; + res->start = (first << 21); + res->end = (last << 21) + ((1 << 21) - 1); res->flags = IORESOURCE_IO; - region.start = (first << 21); - region.end = (last << 21) + ((1 << 21) - 1); - pcibios_bus_to_resource(dev, res, ®ion); + pci_resource_adjust(res, &pbm->io_space); pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map); apb_calc_first_last(map, &first, &last); res = bus->resource[1]; + res->start = (first << 21); + res->end = (last << 21) + ((1 << 21) - 1); res->flags = IORESOURCE_MEM; - region.start = (first << 21); - region.end = (last << 21) + ((1 << 21) - 1); - pcibios_bus_to_resource(dev, res, ®ion); + pci_resource_adjust(res, &pbm->mem_space); } static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm, @@ -504,7 +506,6 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm, struct pci_bus *bus; const u32 *busrange, *ranges; int len, i, simba; - struct pci_bus_region region; struct resource *res; unsigned int flags; u64 size; @@ -555,6 +556,8 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm, } i = 1; for (; len >= 32; len -= 32, ranges += 8) { + struct resource *root; + flags = pci_parse_of_flags(ranges[0]); size = GET_64BIT(ranges, 6); if (flags == 0 || size == 0) @@ -566,6 +569,7 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm, " for bridge %s\n", node->full_name); continue; } + root = &pbm->io_space; } else { if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { printk(KERN_ERR "PCI: too many memory ranges" @@ -574,12 +578,18 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm, } res = bus->resource[i]; ++i; + root = &pbm->mem_space; } + res->start = GET_64BIT(ranges, 1); + res->end = res->start + size - 1; res->flags = flags; - region.start = GET_64BIT(ranges, 1); - region.end = region.start + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); + + /* Another way to implement this would be to add an of_device + * layer routine that can calculate a resource for a given + * range property value in a PCI device. + */ + pci_resource_adjust(res, root); } after_ranges: sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), @@ -681,10 +691,8 @@ struct pci_bus * __devinit pci_scan_one_pbm(struct pci_pbm_info *pbm, printk("PCI: Scanning PBM %s\n", node->full_name); - pci_add_resource_offset(&resources, &pbm->io_space, - pbm->io_space.start); - pci_add_resource_offset(&resources, &pbm->mem_space, - pbm->mem_space.start); + pci_add_resource(&resources, &pbm->io_space); + pci_add_resource(&resources, &pbm->mem_space); bus = pci_create_root_bus(parent, pbm->pci_first_busno, pbm->pci_ops, pbm, &resources); if (!bus) { @@ -747,6 +755,46 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } +void pcibios_resource_to_bus(struct pci_dev *pdev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_pbm_info *pbm = pdev->bus->sysdata; + struct resource zero_res, *root; + + zero_res.start = 0; + zero_res.end = 0; + zero_res.flags = res->flags; + + if (res->flags & IORESOURCE_IO) + root = &pbm->io_space; + else + root = &pbm->mem_space; + + pci_resource_adjust(&zero_res, root); + + region->start = res->start - zero_res.start; + region->end = res->end - zero_res.start; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_pbm_info *pbm = pdev->bus->sysdata; + struct resource *root; + + res->start = region->start; + res->end = region->end; + + if (res->flags & IORESOURCE_IO) + root = &pbm->io_space; + else + root = &pbm->mem_space; + + pci_resource_adjust(res, root); +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + char * __devinit pcibios_setup(char *str) { return str; diff --git a/trunk/arch/unicore32/include/asm/pci.h b/trunk/arch/unicore32/include/asm/pci.h index f5e108f4a151..dd3867727c35 100644 --- a/trunk/arch/unicore32/include/asm/pci.h +++ b/trunk/arch/unicore32/include/asm/pci.h @@ -14,7 +14,6 @@ #ifdef __KERNEL__ #include -#include #include #include /* for PCIBIOS_MIN_* */ diff --git a/trunk/arch/unicore32/kernel/pci.c b/trunk/arch/unicore32/kernel/pci.c index 2fc2b1ba825e..a8f07fe10cad 100644 --- a/trunk/arch/unicore32/kernel/pci.c +++ b/trunk/arch/unicore32/kernel/pci.c @@ -21,6 +21,7 @@ #include static int debug_pci; +static int use_firmware; #define CONFIG_CMD(bus, devfn, where) \ (0x80000000 | (bus->number << 16) | (devfn << 8) | (where & ~3)) @@ -275,7 +276,7 @@ static int __init pci_common_init(void) pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq); - if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (!use_firmware) { /* * Size the bridge windows. */ @@ -302,7 +303,7 @@ char * __devinit pcibios_setup(char *str) debug_pci = 1; return NULL; } else if (!strcmp(str, "firmware")) { - pci_add_flags(PCI_PROBE_ONLY); + use_firmware = 1; return NULL; } return str; diff --git a/trunk/arch/x86/include/asm/kgdb.h b/trunk/arch/x86/include/asm/kgdb.h index 332f98c9111f..77e95f54570a 100644 --- a/trunk/arch/x86/include/asm/kgdb.h +++ b/trunk/arch/x86/include/asm/kgdb.h @@ -64,15 +64,11 @@ enum regnames { GDB_PS, /* 17 */ GDB_CS, /* 18 */ GDB_SS, /* 19 */ - GDB_DS, /* 20 */ - GDB_ES, /* 21 */ - GDB_FS, /* 22 */ - GDB_GS, /* 23 */ }; #define GDB_ORIG_AX 57 -#define DBG_MAX_REG_NUM 24 -/* 17 64 bit regs and 5 32 bit regs */ -#define NUMREGBYTES ((17 * 8) + (5 * 4)) +#define DBG_MAX_REG_NUM 20 +/* 17 64 bit regs and 3 32 bit regs */ +#define NUMREGBYTES ((17 * 8) + (3 * 4)) #endif /* ! CONFIG_X86_32 */ static inline void arch_kgdb_breakpoint(void) diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c index fdc37b3d0ce3..faba5771acad 100644 --- a/trunk/arch/x86/kernel/kgdb.c +++ b/trunk/arch/x86/kernel/kgdb.c @@ -67,6 +67,8 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { "ss", 4, offsetof(struct pt_regs, ss) }, { "ds", 4, offsetof(struct pt_regs, ds) }, { "es", 4, offsetof(struct pt_regs, es) }, + { "fs", 4, -1 }, + { "gs", 4, -1 }, #else { "ax", 8, offsetof(struct pt_regs, ax) }, { "bx", 8, offsetof(struct pt_regs, bx) }, @@ -88,11 +90,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { "flags", 4, offsetof(struct pt_regs, flags) }, { "cs", 4, offsetof(struct pt_regs, cs) }, { "ss", 4, offsetof(struct pt_regs, ss) }, - { "ds", 4, -1 }, - { "es", 4, -1 }, #endif - { "fs", 4, -1 }, - { "gs", 4, -1 }, }; int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) diff --git a/trunk/arch/x86/kernel/pci-dma.c b/trunk/arch/x86/kernel/pci-dma.c index 28e5e06fcba4..1c4d769e21ea 100644 --- a/trunk/arch/x86/kernel/pci-dma.c +++ b/trunk/arch/x86/kernel/pci-dma.c @@ -262,11 +262,10 @@ rootfs_initcall(pci_iommu_init); static __devinit void via_no_dac(struct pci_dev *dev) { - if (forbid_dac == 0) { + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); forbid_dac = 1; } } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, 8, via_no_dac); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); #endif diff --git a/trunk/arch/x86/pci/acpi.c b/trunk/arch/x86/pci/acpi.c index ed2835e148b5..49a5cb55429b 100644 --- a/trunk/arch/x86/pci/acpi.c +++ b/trunk/arch/x86/pci/acpi.c @@ -416,12 +416,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) kfree(sd); } else { get_current_resources(device, busnum, domain, &resources); - - /* - * _CRS with no apertures is normal, so only fall back to - * defaults or native bridge info if we're ignoring _CRS. - */ - if (!pci_use_crs) + if (list_empty(&resources)) x86_pci_root_bus_resources(busnum, &resources); bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); diff --git a/trunk/arch/x86/pci/fixup.c b/trunk/arch/x86/pci/fixup.c index d0e6e403b4f6..6dd89555fbfa 100644 --- a/trunk/arch/x86/pci/fixup.c +++ b/trunk/arch/x86/pci/fixup.c @@ -164,11 +164,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_ */ static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) { - if ((dev->device & 0xff00) == 0x2400) + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (dev->device & 0xff00) == 0x2400) dev->transparent = 1; } -DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); /* * Fixup for C1 Halt Disconnect problem on nForce2 systems. @@ -322,6 +322,9 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) struct pci_bus *bus; u16 config; + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + return; + /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { @@ -350,8 +353,7 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); } } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { diff --git a/trunk/arch/x86/pci/i386.c b/trunk/arch/x86/pci/i386.c index 831971e731f7..91821a1a0c3a 100644 --- a/trunk/arch/x86/pci/i386.c +++ b/trunk/arch/x86/pci/i386.c @@ -39,87 +39,6 @@ #include -/* - * This list of dynamic mappings is for temporarily maintaining - * original BIOS BAR addresses for possible reinstatement. - */ -struct pcibios_fwaddrmap { - struct list_head list; - struct pci_dev *dev; - resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; -}; - -static LIST_HEAD(pcibios_fwaddrmappings); -static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); - -/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ -static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) -{ - struct pcibios_fwaddrmap *map; - - WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock)); - - list_for_each_entry(map, &pcibios_fwaddrmappings, list) - if (map->dev == dev) - return map; - - return NULL; -} - -static void -pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr) -{ - unsigned long flags; - struct pcibios_fwaddrmap *map; - - spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); - map = pcibios_fwaddrmap_lookup(dev); - if (!map) { - spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); - map = kzalloc(sizeof(*map), GFP_KERNEL); - if (!map) - return; - - map->dev = pci_dev_get(dev); - map->fw_addr[idx] = fw_addr; - INIT_LIST_HEAD(&map->list); - - spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); - list_add_tail(&map->list, &pcibios_fwaddrmappings); - } else - map->fw_addr[idx] = fw_addr; - spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); -} - -resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) -{ - unsigned long flags; - struct pcibios_fwaddrmap *map; - resource_size_t fw_addr = 0; - - spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); - map = pcibios_fwaddrmap_lookup(dev); - if (map) - fw_addr = map->fw_addr[idx]; - spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); - - return fw_addr; -} - -static void pcibios_fw_addr_list_del(void) -{ - unsigned long flags; - struct pcibios_fwaddrmap *entry, *next; - - spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); - list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) { - list_del(&entry->list); - pci_dev_put(entry->dev); - kfree(entry); - } - spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); -} - static int skip_isa_ioresource_align(struct pci_dev *dev) { @@ -263,8 +182,7 @@ static void __init pcibios_allocate_resources(int pass) idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { /* We'll assign a new address later */ - pcibios_save_fw_addr(dev, - idx, r->start); + dev->fw_addr[idx] = r->start; r->end -= r->start; r->start = 0; } @@ -310,7 +228,6 @@ static int __init pcibios_assign_resources(void) } pci_assign_unassigned_resources(); - pcibios_fw_addr_list_del(); return 0; } diff --git a/trunk/arch/x86/pci/mrst.c b/trunk/arch/x86/pci/mrst.c index 140942f66b31..cb29191cee58 100644 --- a/trunk/arch/x86/pci/mrst.c +++ b/trunk/arch/x86/pci/mrst.c @@ -43,8 +43,6 @@ #define PCI_FIXED_BAR_4_SIZE 0x14 #define PCI_FIXED_BAR_5_SIZE 0x1c -static int pci_soc_mode = 0; - /** * fixed_bar_cap - return the offset of the fixed BAR cap if found * @bus: PCI bus @@ -150,9 +148,7 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) */ if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) return 0; - if (bus == 0 && (devfn == PCI_DEVFN(2, 0) - || devfn == PCI_DEVFN(0, 0) - || devfn == PCI_DEVFN(3, 0))) + if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) return 1; return 0; /* langwell on others */ } @@ -235,43 +231,14 @@ struct pci_ops pci_mrst_ops = { */ int __init pci_mrst_init(void) { - printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n"); + printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); pci_mmcfg_late_init(); pcibios_enable_irq = mrst_pci_irq_enable; pci_root_ops = pci_mrst_ops; - pci_soc_mode = 1; /* Continue with standard init */ return 1; } -/* Langwell devices are not true pci devices, they are not subject to 10 ms - * d3 to d0 delay required by pci spec. - */ -static void __devinit pci_d3delay_fixup(struct pci_dev *dev) -{ - /* PCI fixups are effectively decided compile time. If we have a dual - SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */ - if (!pci_soc_mode) - return; - /* true pci devices in lincroft should allow type 1 access, the rest - * are langwell fake pci devices. - */ - if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID)) - return; - dev->d3_delay = 0; -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); - -static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) -{ - pci_set_power_state(dev, PCI_D3cold); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); - /* * Langwell devices reside at fixed offsets, don't try to move them. */ @@ -281,9 +248,6 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) u32 size; int i; - if (!pci_soc_mode) - return; - /* Must have extended configuration space */ if (dev->cfg_size < PCIE_CAP_OFFSET + 4) return; diff --git a/trunk/arch/xtensa/kernel/pci.c b/trunk/arch/xtensa/kernel/pci.c index eb30e356f5be..61045c192e88 100644 --- a/trunk/arch/xtensa/kernel/pci.c +++ b/trunk/arch/xtensa/kernel/pci.c @@ -153,7 +153,7 @@ static void __init pci_controller_apertures(struct pci_controller *pci_ctrl, } res->start += io_offset; res->end += io_offset; - pci_add_resource_offset(resources, res, io_offset); + pci_add_resource(resources, res); for (i = 0; i < 3; i++) { res = &pci_ctrl->mem_resources[i]; @@ -200,9 +200,24 @@ subsys_initcall(pcibios_init); void __init pcibios_fixup_bus(struct pci_bus *bus) { + struct pci_controller *pci_ctrl = bus->sysdata; + struct resource *res; + unsigned long io_offset; + int i; + + io_offset = (unsigned long)pci_ctrl->io_space.base; if (bus->parent) { /* This is a subordinate bridge */ pci_read_bridge_bases(bus); + + for (i = 0; i < 4; i++) { + if ((res = bus->resource[i]) == NULL || !res->flags) + continue; + if (io_offset && (res->flags & IORESOURCE_IO)) { + res->start += io_offset; + res->end += io_offset; + } + } } } diff --git a/trunk/drivers/hwmon/fam15h_power.c b/trunk/drivers/hwmon/fam15h_power.c index 523f8fb9e7d9..930370de5201 100644 --- a/trunk/drivers/hwmon/fam15h_power.c +++ b/trunk/drivers/hwmon/fam15h_power.c @@ -60,7 +60,7 @@ static ssize_t show_power(struct device *dev, pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), REG_TDP_RUNNING_AVERAGE, &val); running_avg_capture = (val >> 4) & 0x3fffff; - running_avg_capture = sign_extend32(running_avg_capture, 22); + running_avg_capture = sign_extend32(running_avg_capture, 21); running_avg_range = val & 0xf; pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), diff --git a/trunk/drivers/i2c/busses/Kconfig b/trunk/drivers/i2c/busses/Kconfig index 71c1b0a7535c..3101dd59e379 100644 --- a/trunk/drivers/i2c/busses/Kconfig +++ b/trunk/drivers/i2c/busses/Kconfig @@ -369,21 +369,6 @@ config I2C_DESIGNWARE_PCI This driver can also be built as a module. If so, the module will be called i2c-designware-pci. -config I2C_EG20T - tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) I2C" - depends on PCI - help - This driver is for PCH(Platform controller Hub) I2C of EG20T which - is an IOH(Input/Output Hub) for x86 embedded processor. - This driver can access PCH I2C bus device. - - This driver also can be used for LAPIS Semiconductor IOH(Input/ - Output Hub), ML7213, ML7223 and ML7831. - ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is - for MP(Media Phone) use and ML7831 IOH is for general purpose use. - ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. - ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. - config I2C_GPIO tristate "GPIO-based bitbanging I2C" depends on GENERIC_GPIO @@ -645,16 +630,6 @@ config I2C_SIMTEC This driver can also be built as a module. If so, the module will be called i2c-simtec. -config I2C_SIRF - tristate "CSR SiRFprimaII I2C interface" - depends on ARCH_PRIMA2 - help - If you say yes to this option, support will be included for the - CSR SiRFprimaII I2C interface. - - This driver can also be built as a module. If so, the module - will be called i2c-sirf. - config I2C_STU300 tristate "ST Microelectronics DDC I2C interface" depends on MACH_U300 @@ -706,15 +681,20 @@ config I2C_XILINX This driver can also be built as a module. If so, the module will be called xilinx_i2c. -config I2C_XLR - tristate "XLR I2C support" - depends on CPU_XLR +config I2C_EG20T + tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) I2C" + depends on PCI help - This driver enables support for the on-chip I2C interface of - the Netlogic XLR/XLS MIPS processors. + This driver is for PCH(Platform controller Hub) I2C of EG20T which + is an IOH(Input/Output Hub) for x86 embedded processor. + This driver can access PCH I2C bus device. - This driver can also be built as a module. If so, the module - will be called i2c-xlr. + This driver also can be used for LAPIS Semiconductor IOH(Input/ + Output Hub), ML7213, ML7223 and ML7831. + ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is + for MP(Media Phone) use and ML7831 IOH is for general purpose use. + ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. comment "External I2C/SMBus adapter drivers" diff --git a/trunk/drivers/i2c/busses/Makefile b/trunk/drivers/i2c/busses/Makefile index 569567b0d027..fba6da60aa0e 100644 --- a/trunk/drivers/i2c/busses/Makefile +++ b/trunk/drivers/i2c/busses/Makefile @@ -37,7 +37,6 @@ obj-$(CONFIG_I2C_DESIGNWARE_PLATFORM) += i2c-designware-platform.o i2c-designware-platform-objs := i2c-designware-platdrv.o i2c-designware-core.o obj-$(CONFIG_I2C_DESIGNWARE_PCI) += i2c-designware-pci.o i2c-designware-pci-objs := i2c-designware-pcidrv.o i2c-designware-core.o -obj-$(CONFIG_I2C_EG20T) += i2c-eg20t.o obj-$(CONFIG_I2C_GPIO) += i2c-gpio.o obj-$(CONFIG_I2C_HIGHLANDER) += i2c-highlander.o obj-$(CONFIG_I2C_IBM_IIC) += i2c-ibm_iic.o @@ -64,13 +63,12 @@ obj-$(CONFIG_I2C_S6000) += i2c-s6000.o obj-$(CONFIG_I2C_SH7760) += i2c-sh7760.o obj-$(CONFIG_I2C_SH_MOBILE) += i2c-sh_mobile.o obj-$(CONFIG_I2C_SIMTEC) += i2c-simtec.o -obj-$(CONFIG_I2C_SIRF) += i2c-sirf.o obj-$(CONFIG_I2C_STU300) += i2c-stu300.o obj-$(CONFIG_I2C_TEGRA) += i2c-tegra.o obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o obj-$(CONFIG_I2C_OCTEON) += i2c-octeon.o obj-$(CONFIG_I2C_XILINX) += i2c-xiic.o -obj-$(CONFIG_I2C_XLR) += i2c-xlr.o +obj-$(CONFIG_I2C_EG20T) += i2c-eg20t.o # External I2C/SMBus adapter drivers obj-$(CONFIG_I2C_DIOLAN_U2C) += i2c-diolan-u2c.o diff --git a/trunk/drivers/i2c/busses/i2c-designware-platdrv.c b/trunk/drivers/i2c/busses/i2c-designware-platdrv.c index 4ba589ab8614..5244c4724df7 100644 --- a/trunk/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/trunk/drivers/i2c/busses/i2c-designware-platdrv.c @@ -214,7 +214,7 @@ static int __init dw_i2c_init_driver(void) { return platform_driver_probe(&dw_i2c_driver, dw_i2c_probe); } -subsys_initcall(dw_i2c_init_driver); +module_init(dw_i2c_init_driver); static void __exit dw_i2c_exit_driver(void) { diff --git a/trunk/drivers/i2c/busses/i2c-eg20t.c b/trunk/drivers/i2c/busses/i2c-eg20t.c index f086131cb1c7..ca8877641040 100644 --- a/trunk/drivers/i2c/busses/i2c-eg20t.c +++ b/trunk/drivers/i2c/busses/i2c-eg20t.c @@ -271,36 +271,30 @@ static inline bool ktime_lt(const ktime_t cmp1, const ktime_t cmp2) /** * pch_i2c_wait_for_bus_idle() - check the status of bus. * @adap: Pointer to struct i2c_algo_pch_data. - * @timeout: waiting time counter (ms). + * @timeout: waiting time counter (us). */ static s32 pch_i2c_wait_for_bus_idle(struct i2c_algo_pch_data *adap, s32 timeout) { void __iomem *p = adap->pch_base_address; - int schedule = 0; - unsigned long end = jiffies + msecs_to_jiffies(timeout); - - while (ioread32(p + PCH_I2CSR) & I2CMBB_BIT) { - if (time_after(jiffies, end)) { - pch_dbg(adap, "I2CSR = %x\n", ioread32(p + PCH_I2CSR)); - pch_err(adap, "%s: Timeout Error.return%d\n", - __func__, -ETIME); - pch_i2c_init(adap); + ktime_t ns_val; - return -ETIME; - } + if ((ioread32(p + PCH_I2CSR) & I2CMBB_BIT) == 0) + return 0; - if (!schedule) - /* Retry after some usecs */ - udelay(5); - else - /* Wait a bit more without consuming CPU */ - usleep_range(20, 1000); + /* MAX timeout value is timeout*1000*1000nsec */ + ns_val = ktime_add_ns(ktime_get(), timeout*1000*1000); + do { + msleep(20); + if ((ioread32(p + PCH_I2CSR) & I2CMBB_BIT) == 0) + return 0; + } while (ktime_lt(ktime_get(), ns_val)); - schedule = 1; - } + pch_dbg(adap, "I2CSR = %x\n", ioread32(p + PCH_I2CSR)); + pch_err(adap, "%s: Timeout Error.return%d\n", __func__, -ETIME); + pch_i2c_init(adap); - return 0; + return -ETIME; } /** @@ -784,6 +778,8 @@ static s32 pch_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *pmsg; u32 i = 0; u32 status; + u32 msglen; + u32 subaddrlen; s32 ret; struct i2c_algo_pch_data *adap = i2c_adap->algo_data; @@ -808,6 +804,12 @@ static s32 pch_i2c_xfer(struct i2c_adapter *i2c_adap, status = pmsg->flags; pch_dbg(adap, "After invoking I2C_MODE_SEL :flag= 0x%x\n", status); + /* calculate sub address length and message length */ + /* these are applicable only for buffer mode */ + subaddrlen = pmsg->buf[0]; + /* calculate actual message length excluding + * the sub address fields */ + msglen = (pmsg->len) - (subaddrlen + 1); if ((status & (I2C_M_RD)) != false) { ret = pch_i2c_readbytes(i2c_adap, pmsg, (i + 1 == num), diff --git a/trunk/drivers/i2c/busses/i2c-imx.c b/trunk/drivers/i2c/busses/i2c-imx.c index 124d9c594f40..58832e578fff 100644 --- a/trunk/drivers/i2c/busses/i2c-imx.c +++ b/trunk/drivers/i2c/busses/i2c-imx.c @@ -149,6 +149,11 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy) break; if (!for_busy && !(temp & I2SR_IBB)) break; + if (signal_pending(current)) { + dev_dbg(&i2c_imx->adapter.dev, + "<%s> I2C Interrupted\n", __func__); + return -EINTR; + } if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) { dev_dbg(&i2c_imx->adapter.dev, "<%s> I2C bus is busy\n", __func__); diff --git a/trunk/drivers/i2c/busses/i2c-mpc.c b/trunk/drivers/i2c/busses/i2c-mpc.c index 206caacd30d7..a8ebb84e23f9 100644 --- a/trunk/drivers/i2c/busses/i2c-mpc.c +++ b/trunk/drivers/i2c/busses/i2c-mpc.c @@ -454,7 +454,7 @@ static int mpc_write(struct mpc_i2c *i2c, int target, } static int mpc_read(struct mpc_i2c *i2c, int target, - u8 *data, int length, int restart, bool recv_len) + u8 *data, int length, int restart) { unsigned timeout = i2c->adap.timeout; int i, result; @@ -470,7 +470,7 @@ static int mpc_read(struct mpc_i2c *i2c, int target, return result; if (length) { - if (length == 1 && !recv_len) + if (length == 1) writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA | CCR_TXAK); else writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA); @@ -479,46 +479,17 @@ static int mpc_read(struct mpc_i2c *i2c, int target, } for (i = 0; i < length; i++) { - u8 byte; - result = i2c_wait(i2c, timeout, 0); if (result < 0) return result; - /* - * For block reads, we have to know the total length (1st byte) - * before we can determine if we are done. - */ - if (i || !recv_len) { - /* Generate txack on next to last byte */ - if (i == length - 2) - writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA - | CCR_TXAK); - /* Do not generate stop on last byte */ - if (i == length - 1) - writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA - | CCR_MTX); - } - - byte = readb(i2c->base + MPC_I2C_DR); - - /* - * Adjust length if first received byte is length. - * The length is 1 length byte plus actually data length - */ - if (i == 0 && recv_len) { - if (byte == 0 || byte > I2C_SMBUS_BLOCK_MAX) - return -EPROTO; - length += byte; - /* - * For block reads, generate txack here if data length - * is 1 byte (total length is 2 bytes). - */ - if (length == 2) - writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA - | CCR_TXAK); - } - data[i] = byte; + /* Generate txack on next to last byte */ + if (i == length - 2) + writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA | CCR_TXAK); + /* Do not generate stop on last byte */ + if (i == length - 1) + writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA | CCR_MTX); + data[i] = readb(i2c->base + MPC_I2C_DR); } return length; @@ -561,17 +532,12 @@ static int mpc_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) "Doing %s %d bytes to 0x%02x - %d of %d messages\n", pmsg->flags & I2C_M_RD ? "read" : "write", pmsg->len, pmsg->addr, i + 1, num); - if (pmsg->flags & I2C_M_RD) { - bool recv_len = pmsg->flags & I2C_M_RECV_LEN; - - ret = mpc_read(i2c, pmsg->addr, pmsg->buf, pmsg->len, i, - recv_len); - if (recv_len && ret > 0) - pmsg->len = ret; - } else { + if (pmsg->flags & I2C_M_RD) + ret = + mpc_read(i2c, pmsg->addr, pmsg->buf, pmsg->len, i); + else ret = mpc_write(i2c, pmsg->addr, pmsg->buf, pmsg->len, i); - } } mpc_i2c_stop(i2c); return (ret < 0) ? ret : num; @@ -579,8 +545,7 @@ static int mpc_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) static u32 mpc_functionality(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL - | I2C_FUNC_SMBUS_READ_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL; + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static const struct i2c_algorithm mpc_algo = { diff --git a/trunk/drivers/i2c/busses/i2c-s3c2410.c b/trunk/drivers/i2c/busses/i2c-s3c2410.c index 737f7218a32c..4c1718081685 100644 --- a/trunk/drivers/i2c/busses/i2c-s3c2410.c +++ b/trunk/drivers/i2c/busses/i2c-s3c2410.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -565,7 +564,6 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap, int retry; int ret; - pm_runtime_get_sync(&adap->dev); clk_enable(i2c->clk); for (retry = 0; retry < adap->retries; retry++) { @@ -574,7 +572,6 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap, if (ret != -EAGAIN) { clk_disable(i2c->clk); - pm_runtime_put_sync(&adap->dev); return ret; } @@ -584,7 +581,6 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap, } clk_disable(i2c->clk); - pm_runtime_put_sync(&adap->dev); return -EREMOTEIO; } @@ -894,7 +890,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) } } - i2c = devm_kzalloc(&pdev->dev, sizeof(struct s3c24xx_i2c), GFP_KERNEL); + i2c = kzalloc(sizeof(struct s3c24xx_i2c), GFP_KERNEL); if (!i2c) { dev_err(&pdev->dev, "no memory for state\n"); return -ENOMEM; @@ -1017,9 +1013,6 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) of_i2c_register_devices(&i2c->adap); platform_set_drvdata(pdev, i2c); - pm_runtime_enable(&pdev->dev); - pm_runtime_enable(&i2c->adap.dev); - dev_info(&pdev->dev, "%s: S3C I2C adapter\n", dev_name(&i2c->adap.dev)); clk_disable(i2c->clk); return 0; @@ -1042,6 +1035,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) clk_put(i2c->clk); err_noclk: + kfree(i2c); return ret; } @@ -1054,9 +1048,6 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev) { struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev); - pm_runtime_disable(&i2c->adap.dev); - pm_runtime_disable(&pdev->dev); - s3c24xx_i2c_deregister_cpufreq(i2c); i2c_del_adapter(&i2c->adap); @@ -1070,6 +1061,7 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev) release_resource(i2c->ioarea); s3c24xx_i2c_dt_gpio_free(i2c); kfree(i2c->ioarea); + kfree(i2c); return 0; } diff --git a/trunk/drivers/i2c/busses/i2c-sirf.c b/trunk/drivers/i2c/busses/i2c-sirf.c deleted file mode 100644 index 5574a47792fb..000000000000 --- a/trunk/drivers/i2c/busses/i2c-sirf.c +++ /dev/null @@ -1,459 +0,0 @@ -/* - * I2C bus driver for CSR SiRFprimaII - * - * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. - * - * Licensed under GPLv2 or later. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define SIRFSOC_I2C_CLK_CTRL 0x00 -#define SIRFSOC_I2C_STATUS 0x0C -#define SIRFSOC_I2C_CTRL 0x10 -#define SIRFSOC_I2C_IO_CTRL 0x14 -#define SIRFSOC_I2C_SDA_DELAY 0x18 -#define SIRFSOC_I2C_CMD_START 0x1C -#define SIRFSOC_I2C_CMD_BUF 0x30 -#define SIRFSOC_I2C_DATA_BUF 0x80 - -#define SIRFSOC_I2C_CMD_BUF_MAX 16 -#define SIRFSOC_I2C_DATA_BUF_MAX 16 - -#define SIRFSOC_I2C_CMD(x) (SIRFSOC_I2C_CMD_BUF + (x)*0x04) -#define SIRFSOC_I2C_DATA_MASK(x) (0xFF<<(((x)&3)*8)) -#define SIRFSOC_I2C_DATA_SHIFT(x) (((x)&3)*8) - -#define SIRFSOC_I2C_DIV_MASK (0xFFFF) - -/* I2C status flags */ -#define SIRFSOC_I2C_STAT_BUSY BIT(0) -#define SIRFSOC_I2C_STAT_TIP BIT(1) -#define SIRFSOC_I2C_STAT_NACK BIT(2) -#define SIRFSOC_I2C_STAT_TR_INT BIT(4) -#define SIRFSOC_I2C_STAT_STOP BIT(6) -#define SIRFSOC_I2C_STAT_CMD_DONE BIT(8) -#define SIRFSOC_I2C_STAT_ERR BIT(9) -#define SIRFSOC_I2C_CMD_INDEX (0x1F<<16) - -/* I2C control flags */ -#define SIRFSOC_I2C_RESET BIT(0) -#define SIRFSOC_I2C_CORE_EN BIT(1) -#define SIRFSOC_I2C_MASTER_MODE BIT(2) -#define SIRFSOC_I2C_CMD_DONE_EN BIT(11) -#define SIRFSOC_I2C_ERR_INT_EN BIT(12) - -#define SIRFSOC_I2C_SDA_DELAY_MASK (0xFF) -#define SIRFSOC_I2C_SCLF_FILTER (3<<8) - -#define SIRFSOC_I2C_START_CMD BIT(0) - -#define SIRFSOC_I2C_CMD_RP(x) ((x)&0x7) -#define SIRFSOC_I2C_NACK BIT(3) -#define SIRFSOC_I2C_WRITE BIT(4) -#define SIRFSOC_I2C_READ BIT(5) -#define SIRFSOC_I2C_STOP BIT(6) -#define SIRFSOC_I2C_START BIT(7) - -#define SIRFSOC_I2C_DEFAULT_SPEED 100000 - -struct sirfsoc_i2c { - void __iomem *base; - struct clk *clk; - u32 cmd_ptr; /* Current position in CMD buffer */ - u8 *buf; /* Buffer passed by user */ - u32 msg_len; /* Message length */ - u32 finished_len; /* number of bytes read/written */ - u32 read_cmd_len; /* number of read cmd sent */ - int msg_read; /* 1 indicates a read message */ - int err_status; /* 1 indicates an error on bus */ - - u32 sda_delay; /* For suspend/resume */ - u32 clk_div; - int last; /* Last message in transfer, STOP cmd can be sent */ - - struct completion done; /* indicates completion of message transfer */ - struct i2c_adapter adapter; -}; - -static void i2c_sirfsoc_read_data(struct sirfsoc_i2c *siic) -{ - u32 data = 0; - int i; - - for (i = 0; i < siic->read_cmd_len; i++) { - if (!(i & 0x3)) - data = readl(siic->base + SIRFSOC_I2C_DATA_BUF + i); - siic->buf[siic->finished_len++] = - (u8)((data & SIRFSOC_I2C_DATA_MASK(i)) >> - SIRFSOC_I2C_DATA_SHIFT(i)); - } -} - -static void i2c_sirfsoc_queue_cmd(struct sirfsoc_i2c *siic) -{ - u32 regval; - int i = 0; - - if (siic->msg_read) { - while (((siic->finished_len + i) < siic->msg_len) - && (siic->cmd_ptr < SIRFSOC_I2C_CMD_BUF_MAX)) { - regval = SIRFSOC_I2C_READ | SIRFSOC_I2C_CMD_RP(0); - if (((siic->finished_len + i) == - (siic->msg_len - 1)) && siic->last) - regval |= SIRFSOC_I2C_STOP | SIRFSOC_I2C_NACK; - writel(regval, - siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++)); - i++; - } - - siic->read_cmd_len = i; - } else { - while ((siic->cmd_ptr < SIRFSOC_I2C_CMD_BUF_MAX - 1) - && (siic->finished_len < siic->msg_len)) { - regval = SIRFSOC_I2C_WRITE | SIRFSOC_I2C_CMD_RP(0); - if ((siic->finished_len == (siic->msg_len - 1)) - && siic->last) - regval |= SIRFSOC_I2C_STOP; - writel(regval, - siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++)); - writel(siic->buf[siic->finished_len++], - siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++)); - } - } - siic->cmd_ptr = 0; - - /* Trigger the transfer */ - writel(SIRFSOC_I2C_START_CMD, siic->base + SIRFSOC_I2C_CMD_START); -} - -static irqreturn_t i2c_sirfsoc_irq(int irq, void *dev_id) -{ - struct sirfsoc_i2c *siic = (struct sirfsoc_i2c *)dev_id; - u32 i2c_stat = readl(siic->base + SIRFSOC_I2C_STATUS); - - if (i2c_stat & SIRFSOC_I2C_STAT_ERR) { - /* Error conditions */ - siic->err_status = 1; - writel(SIRFSOC_I2C_STAT_ERR, siic->base + SIRFSOC_I2C_STATUS); - - if (i2c_stat & SIRFSOC_I2C_STAT_NACK) - dev_err(&siic->adapter.dev, "ACK not received\n"); - else - dev_err(&siic->adapter.dev, "I2C error\n"); - - complete(&siic->done); - } else if (i2c_stat & SIRFSOC_I2C_STAT_CMD_DONE) { - /* CMD buffer execution complete */ - if (siic->msg_read) - i2c_sirfsoc_read_data(siic); - if (siic->finished_len == siic->msg_len) - complete(&siic->done); - else /* Fill a new CMD buffer for left data */ - i2c_sirfsoc_queue_cmd(siic); - - writel(SIRFSOC_I2C_STAT_CMD_DONE, siic->base + SIRFSOC_I2C_STATUS); - } - - return IRQ_HANDLED; -} - -static void i2c_sirfsoc_set_address(struct sirfsoc_i2c *siic, - struct i2c_msg *msg) -{ - unsigned char addr; - u32 regval = SIRFSOC_I2C_START | SIRFSOC_I2C_CMD_RP(0) | SIRFSOC_I2C_WRITE; - - /* no data and last message -> add STOP */ - if (siic->last && (msg->len == 0)) - regval |= SIRFSOC_I2C_STOP; - - writel(regval, siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++)); - - addr = msg->addr << 1; /* Generate address */ - if (msg->flags & I2C_M_RD) - addr |= 1; - - writel(addr, siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++)); -} - -static int i2c_sirfsoc_xfer_msg(struct sirfsoc_i2c *siic, struct i2c_msg *msg) -{ - u32 regval = readl(siic->base + SIRFSOC_I2C_CTRL); - /* timeout waiting for the xfer to finish or fail */ - int timeout = msecs_to_jiffies((msg->len + 1) * 50); - int ret = 0; - - i2c_sirfsoc_set_address(siic, msg); - - writel(regval | SIRFSOC_I2C_CMD_DONE_EN | SIRFSOC_I2C_ERR_INT_EN, - siic->base + SIRFSOC_I2C_CTRL); - i2c_sirfsoc_queue_cmd(siic); - - if (wait_for_completion_timeout(&siic->done, timeout) == 0) { - siic->err_status = 1; - dev_err(&siic->adapter.dev, "Transfer timeout\n"); - } - - writel(regval & ~(SIRFSOC_I2C_CMD_DONE_EN | SIRFSOC_I2C_ERR_INT_EN), - siic->base + SIRFSOC_I2C_CTRL); - writel(0, siic->base + SIRFSOC_I2C_CMD_START); - - if (siic->err_status) { - writel(readl(siic->base + SIRFSOC_I2C_CTRL) | SIRFSOC_I2C_RESET, - siic->base + SIRFSOC_I2C_CTRL); - while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET) - cpu_relax(); - - ret = -EIO; - } - - return ret; -} - -static u32 i2c_sirfsoc_func(struct i2c_adapter *adap) -{ - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; -} - -static int i2c_sirfsoc_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num) -{ - struct sirfsoc_i2c *siic = adap->algo_data; - int i, ret; - - clk_enable(siic->clk); - - for (i = 0; i < num; i++) { - siic->buf = msgs[i].buf; - siic->msg_len = msgs[i].len; - siic->msg_read = !!(msgs[i].flags & I2C_M_RD); - siic->err_status = 0; - siic->cmd_ptr = 0; - siic->finished_len = 0; - siic->last = (i == (num - 1)); - - ret = i2c_sirfsoc_xfer_msg(siic, &msgs[i]); - if (ret) { - clk_disable(siic->clk); - return ret; - } - } - - clk_disable(siic->clk); - return num; -} - -/* I2C algorithms associated with this master controller driver */ -static const struct i2c_algorithm i2c_sirfsoc_algo = { - .master_xfer = i2c_sirfsoc_xfer, - .functionality = i2c_sirfsoc_func, -}; - -static int __devinit i2c_sirfsoc_probe(struct platform_device *pdev) -{ - struct sirfsoc_i2c *siic; - struct i2c_adapter *adap; - struct resource *mem_res; - struct clk *clk; - int bitrate; - int ctrl_speed; - int irq; - - int err; - u32 regval; - - clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(clk)) { - err = PTR_ERR(clk); - dev_err(&pdev->dev, "Clock get failed\n"); - goto err_get_clk; - } - - err = clk_prepare(clk); - if (err) { - dev_err(&pdev->dev, "Clock prepare failed\n"); - goto err_clk_prep; - } - - err = clk_enable(clk); - if (err) { - dev_err(&pdev->dev, "Clock enable failed\n"); - goto err_clk_en; - } - - ctrl_speed = clk_get_rate(clk); - - siic = devm_kzalloc(&pdev->dev, sizeof(*siic), GFP_KERNEL); - if (!siic) { - dev_err(&pdev->dev, "Can't allocate driver data\n"); - err = -ENOMEM; - goto out; - } - adap = &siic->adapter; - adap->class = I2C_CLASS_HWMON; - - mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (mem_res == NULL) { - dev_err(&pdev->dev, "Unable to get MEM resource\n"); - err = -EINVAL; - goto out; - } - - siic->base = devm_request_and_ioremap(&pdev->dev, mem_res); - if (siic->base == NULL) { - dev_err(&pdev->dev, "IO remap failed!\n"); - err = -ENOMEM; - goto out; - } - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - err = irq; - goto out; - } - err = devm_request_irq(&pdev->dev, irq, i2c_sirfsoc_irq, 0, - dev_name(&pdev->dev), siic); - if (err) - goto out; - - adap->algo = &i2c_sirfsoc_algo; - adap->algo_data = siic; - - adap->dev.parent = &pdev->dev; - adap->nr = pdev->id; - - strlcpy(adap->name, "sirfsoc-i2c", sizeof(adap->name)); - - platform_set_drvdata(pdev, adap); - init_completion(&siic->done); - - /* Controller Initalisation */ - - writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL); - while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET) - cpu_relax(); - writel(SIRFSOC_I2C_CORE_EN | SIRFSOC_I2C_MASTER_MODE, - siic->base + SIRFSOC_I2C_CTRL); - - siic->clk = clk; - - err = of_property_read_u32(pdev->dev.of_node, - "clock-frequency", &bitrate); - if (err < 0) - bitrate = SIRFSOC_I2C_DEFAULT_SPEED; - - if (bitrate < 100000) - regval = - (2 * ctrl_speed) / (2 * bitrate * 11); - else - regval = ctrl_speed / (bitrate * 5); - - writel(regval, siic->base + SIRFSOC_I2C_CLK_CTRL); - if (regval > 0xFF) - writel(0xFF, siic->base + SIRFSOC_I2C_SDA_DELAY); - else - writel(regval, siic->base + SIRFSOC_I2C_SDA_DELAY); - - err = i2c_add_numbered_adapter(adap); - if (err < 0) { - dev_err(&pdev->dev, "Can't add new i2c adapter\n"); - goto out; - } - - clk_disable(clk); - - dev_info(&pdev->dev, " I2C adapter ready to operate\n"); - - return 0; - -out: - clk_disable(clk); -err_clk_en: - clk_unprepare(clk); -err_clk_prep: - clk_put(clk); -err_get_clk: - return err; -} - -static int __devexit i2c_sirfsoc_remove(struct platform_device *pdev) -{ - struct i2c_adapter *adapter = platform_get_drvdata(pdev); - struct sirfsoc_i2c *siic = adapter->algo_data; - - writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL); - i2c_del_adapter(adapter); - clk_unprepare(siic->clk); - clk_put(siic->clk); - return 0; -} - -#ifdef CONFIG_PM -static int i2c_sirfsoc_suspend(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct i2c_adapter *adapter = platform_get_drvdata(pdev); - struct sirfsoc_i2c *siic = adapter->algo_data; - - clk_enable(siic->clk); - siic->sda_delay = readl(siic->base + SIRFSOC_I2C_SDA_DELAY); - siic->clk_div = readl(siic->base + SIRFSOC_I2C_CLK_CTRL); - clk_disable(siic->clk); - return 0; -} - -static int i2c_sirfsoc_resume(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct i2c_adapter *adapter = platform_get_drvdata(pdev); - struct sirfsoc_i2c *siic = adapter->algo_data; - - clk_enable(siic->clk); - writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL); - writel(SIRFSOC_I2C_CORE_EN | SIRFSOC_I2C_MASTER_MODE, - siic->base + SIRFSOC_I2C_CTRL); - writel(siic->clk_div, siic->base + SIRFSOC_I2C_CLK_CTRL); - writel(siic->sda_delay, siic->base + SIRFSOC_I2C_SDA_DELAY); - clk_disable(siic->clk); - return 0; -} - -static const struct dev_pm_ops i2c_sirfsoc_pm_ops = { - .suspend = i2c_sirfsoc_suspend, - .resume = i2c_sirfsoc_resume, -}; -#endif - -static const struct of_device_id sirfsoc_i2c_of_match[] __devinitconst = { - { .compatible = "sirf,prima2-i2c", }, - {}, -}; -MODULE_DEVICE_TABLE(of, sirfsoc_i2c_of_match); - -static struct platform_driver i2c_sirfsoc_driver = { - .driver = { - .name = "sirfsoc_i2c", - .owner = THIS_MODULE, -#ifdef CONFIG_PM - .pm = &i2c_sirfsoc_pm_ops, -#endif - .of_match_table = sirfsoc_i2c_of_match, - }, - .probe = i2c_sirfsoc_probe, - .remove = __devexit_p(i2c_sirfsoc_remove), -}; -module_platform_driver(i2c_sirfsoc_driver); - -MODULE_DESCRIPTION("SiRF SoC I2C master controller driver"); -MODULE_AUTHOR("Zhiwu Song , " - "Xiangzhen Ye "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/i2c/busses/i2c-tegra.c b/trunk/drivers/i2c/busses/i2c-tegra.c index e978635e60f0..0ab4a9548745 100644 --- a/trunk/drivers/i2c/busses/i2c-tegra.c +++ b/trunk/drivers/i2c/busses/i2c-tegra.c @@ -457,6 +457,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, int ret; tegra_i2c_flush_fifos(i2c_dev); + i2c_writel(i2c_dev, 0xFF, I2C_INT_STATUS); if (msg->len == 0) return -EINVAL; diff --git a/trunk/drivers/i2c/busses/i2c-versatile.c b/trunk/drivers/i2c/busses/i2c-versatile.c index f585aead50cc..60556012312f 100644 --- a/trunk/drivers/i2c/busses/i2c-versatile.c +++ b/trunk/drivers/i2c/busses/i2c-versatile.c @@ -16,7 +16,6 @@ #include #include #include -#include #define I2C_CONTROL 0x00 #define I2C_CONTROLS 0x00 @@ -100,7 +99,6 @@ static int i2c_versatile_probe(struct platform_device *dev) strlcpy(i2c->adap.name, "Versatile I2C adapter", sizeof(i2c->adap.name)); i2c->adap.algo_data = &i2c->algo; i2c->adap.dev.parent = &dev->dev; - i2c->adap.dev.of_node = dev->dev.of_node; i2c->algo = i2c_versatile_algo; i2c->algo.data = i2c; @@ -113,7 +111,6 @@ static int i2c_versatile_probe(struct platform_device *dev) ret = i2c_bit_add_bus(&i2c->adap); if (ret >= 0) { platform_set_drvdata(dev, i2c); - of_i2c_register_devices(&i2c->adap); return 0; } @@ -136,19 +133,12 @@ static int i2c_versatile_remove(struct platform_device *dev) return 0; } -static const struct of_device_id i2c_versatile_match[] = { - { .compatible = "arm,versatile-i2c", }, - {}, -}; -MODULE_DEVICE_TABLE(of, i2c_versatile_match); - static struct platform_driver i2c_versatile_driver = { .probe = i2c_versatile_probe, .remove = i2c_versatile_remove, .driver = { .name = "versatile-i2c", .owner = THIS_MODULE, - .of_match_table = i2c_versatile_match, }, }; diff --git a/trunk/drivers/i2c/busses/i2c-xlr.c b/trunk/drivers/i2c/busses/i2c-xlr.c deleted file mode 100644 index 96d3fabd8883..000000000000 --- a/trunk/drivers/i2c/busses/i2c-xlr.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright 2011, Netlogic Microsystems Inc. - * Copyright 2004, Matt Porter - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* XLR I2C REGISTERS */ -#define XLR_I2C_CFG 0x00 -#define XLR_I2C_CLKDIV 0x01 -#define XLR_I2C_DEVADDR 0x02 -#define XLR_I2C_ADDR 0x03 -#define XLR_I2C_DATAOUT 0x04 -#define XLR_I2C_DATAIN 0x05 -#define XLR_I2C_STATUS 0x06 -#define XLR_I2C_STARTXFR 0x07 -#define XLR_I2C_BYTECNT 0x08 -#define XLR_I2C_HDSTATIM 0x09 - -/* XLR I2C REGISTERS FLAGS */ -#define XLR_I2C_BUS_BUSY 0x01 -#define XLR_I2C_SDOEMPTY 0x02 -#define XLR_I2C_RXRDY 0x04 -#define XLR_I2C_ACK_ERR 0x08 -#define XLR_I2C_ARB_STARTERR 0x30 - -/* Register Values */ -#define XLR_I2C_CFG_ADDR 0xF8 -#define XLR_I2C_CFG_NOADDR 0xFA -#define XLR_I2C_STARTXFR_ND 0x02 /* No Data */ -#define XLR_I2C_STARTXFR_RD 0x01 /* Read */ -#define XLR_I2C_STARTXFR_WR 0x00 /* Write */ - -#define XLR_I2C_TIMEOUT 10 /* timeout per byte in msec */ - -/* - * On XLR/XLS, we need to use __raw_ IO to read the I2C registers - * because they are in the big-endian MMIO area on the SoC. - * - * The readl/writel implementation on XLR/XLS byteswaps, because - * those are for its little-endian PCI space (see arch/mips/Kconfig). - */ -static inline void xlr_i2c_wreg(u32 __iomem *base, unsigned int reg, u32 val) -{ - __raw_writel(val, base + reg); -} - -static inline u32 xlr_i2c_rdreg(u32 __iomem *base, unsigned int reg) -{ - return __raw_readl(base + reg); -} - -struct xlr_i2c_private { - struct i2c_adapter adap; - u32 __iomem *iobase; -}; - -static int xlr_i2c_tx(struct xlr_i2c_private *priv, u16 len, - u8 *buf, u16 addr) -{ - struct i2c_adapter *adap = &priv->adap; - unsigned long timeout, stoptime, checktime; - u32 i2c_status; - int pos, timedout; - u8 offset, byte; - - offset = buf[0]; - xlr_i2c_wreg(priv->iobase, XLR_I2C_ADDR, offset); - xlr_i2c_wreg(priv->iobase, XLR_I2C_DEVADDR, addr); - xlr_i2c_wreg(priv->iobase, XLR_I2C_CFG, XLR_I2C_CFG_ADDR); - xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len - 1); - - timeout = msecs_to_jiffies(XLR_I2C_TIMEOUT); - stoptime = jiffies + timeout; - timedout = 0; - pos = 1; -retry: - if (len == 1) { - xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR, - XLR_I2C_STARTXFR_ND); - } else { - xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT, buf[pos]); - xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR, - XLR_I2C_STARTXFR_WR); - } - - while (!timedout) { - checktime = jiffies; - i2c_status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS); - - if (i2c_status & XLR_I2C_SDOEMPTY) { - pos++; - /* need to do a empty dataout after the last byte */ - byte = (pos < len) ? buf[pos] : 0; - xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT, byte); - - /* reset timeout on successful xmit */ - stoptime = jiffies + timeout; - } - timedout = time_after(checktime, stoptime); - - if (i2c_status & XLR_I2C_ARB_STARTERR) { - if (timedout) - break; - goto retry; - } - - if (i2c_status & XLR_I2C_ACK_ERR) - return -EIO; - - if ((i2c_status & XLR_I2C_BUS_BUSY) == 0 && pos >= len) - return 0; - } - dev_err(&adap->dev, "I2C transmit timeout\n"); - return -ETIMEDOUT; -} - -static int xlr_i2c_rx(struct xlr_i2c_private *priv, u16 len, u8 *buf, u16 addr) -{ - struct i2c_adapter *adap = &priv->adap; - u32 i2c_status; - unsigned long timeout, stoptime, checktime; - int nbytes, timedout; - u8 byte; - - xlr_i2c_wreg(priv->iobase, XLR_I2C_CFG, XLR_I2C_CFG_NOADDR); - xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len); - xlr_i2c_wreg(priv->iobase, XLR_I2C_DEVADDR, addr); - - timeout = msecs_to_jiffies(XLR_I2C_TIMEOUT); - stoptime = jiffies + timeout; - timedout = 0; - nbytes = 0; -retry: - xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR, XLR_I2C_STARTXFR_RD); - - while (!timedout) { - checktime = jiffies; - i2c_status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS); - if (i2c_status & XLR_I2C_RXRDY) { - if (nbytes > len) - return -EIO; /* should not happen */ - - /* we need to do a dummy datain when nbytes == len */ - byte = xlr_i2c_rdreg(priv->iobase, XLR_I2C_DATAIN); - if (nbytes < len) - buf[nbytes] = byte; - nbytes++; - - /* reset timeout on successful read */ - stoptime = jiffies + timeout; - } - - timedout = time_after(checktime, stoptime); - if (i2c_status & XLR_I2C_ARB_STARTERR) { - if (timedout) - break; - goto retry; - } - - if (i2c_status & XLR_I2C_ACK_ERR) - return -EIO; - - if ((i2c_status & XLR_I2C_BUS_BUSY) == 0) - return 0; - } - - dev_err(&adap->dev, "I2C receive timeout\n"); - return -ETIMEDOUT; -} - -static int xlr_i2c_xfer(struct i2c_adapter *adap, - struct i2c_msg *msgs, int num) -{ - struct i2c_msg *msg; - int i; - int ret = 0; - struct xlr_i2c_private *priv = i2c_get_adapdata(adap); - - for (i = 0; ret == 0 && i < num; i++) { - msg = &msgs[i]; - if (msg->flags & I2C_M_RD) - ret = xlr_i2c_rx(priv, msg->len, &msg->buf[0], - msg->addr); - else - ret = xlr_i2c_tx(priv, msg->len, &msg->buf[0], - msg->addr); - } - - return (ret != 0) ? ret : num; -} - -static u32 xlr_func(struct i2c_adapter *adap) -{ - /* Emulate SMBUS over I2C */ - return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C; -} - -static struct i2c_algorithm xlr_i2c_algo = { - .master_xfer = xlr_i2c_xfer, - .functionality = xlr_func, -}; - -static int __devinit xlr_i2c_probe(struct platform_device *pdev) -{ - struct xlr_i2c_private *priv; - struct resource *res; - int ret; - - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->iobase = devm_request_and_ioremap(&pdev->dev, res); - if (!priv->iobase) { - dev_err(&pdev->dev, "devm_request_and_ioremap failed\n"); - return -EBUSY; - } - - priv->adap.dev.parent = &pdev->dev; - priv->adap.owner = THIS_MODULE; - priv->adap.algo_data = priv; - priv->adap.algo = &xlr_i2c_algo; - priv->adap.nr = pdev->id; - priv->adap.class = I2C_CLASS_HWMON; - snprintf(priv->adap.name, sizeof(priv->adap.name), "xlr-i2c"); - - i2c_set_adapdata(&priv->adap, priv); - ret = i2c_add_numbered_adapter(&priv->adap); - if (ret < 0) { - dev_err(&priv->adap.dev, "Failed to add i2c bus.\n"); - return ret; - } - - platform_set_drvdata(pdev, priv); - dev_info(&priv->adap.dev, "Added I2C Bus.\n"); - return 0; -} - -static int __devexit xlr_i2c_remove(struct platform_device *pdev) -{ - struct xlr_i2c_private *priv; - - priv = platform_get_drvdata(pdev); - i2c_del_adapter(&priv->adap); - platform_set_drvdata(pdev, NULL); - return 0; -} - -static struct platform_driver xlr_i2c_driver = { - .probe = xlr_i2c_probe, - .remove = __devexit_p(xlr_i2c_remove), - .driver = { - .name = "xlr-i2cbus", - .owner = THIS_MODULE, - }, -}; - -module_platform_driver(xlr_i2c_driver); - -MODULE_AUTHOR("Ganesan Ramalingam "); -MODULE_DESCRIPTION("XLR/XLS SoC I2C Controller driver"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:xlr-i2cbus"); diff --git a/trunk/drivers/iommu/Kconfig b/trunk/drivers/iommu/Kconfig index 3bd9fff5c589..6bea6962f8ee 100644 --- a/trunk/drivers/iommu/Kconfig +++ b/trunk/drivers/iommu/Kconfig @@ -142,24 +142,4 @@ config OMAP_IOMMU_DEBUG Say N unless you know you need this. -config TEGRA_IOMMU_GART - bool "Tegra GART IOMMU Support" - depends on ARCH_TEGRA_2x_SOC - select IOMMU_API - help - Enables support for remapping discontiguous physical memory - shared with the operating system into contiguous I/O virtual - space through the GART (Graphics Address Relocation Table) - hardware included on Tegra SoCs. - -config TEGRA_IOMMU_SMMU - bool "Tegra SMMU IOMMU Support" - depends on ARCH_TEGRA_3x_SOC - select IOMMU_API - help - Enables support for remapping discontiguous physical memory - shared with the operating system into contiguous I/O virtual - space through the SMMU (System Memory Management Unit) - hardware included on Tegra SoCs. - endif # IOMMU_SUPPORT diff --git a/trunk/drivers/iommu/Makefile b/trunk/drivers/iommu/Makefile index 7ad7a3bc1242..0e36b4934aff 100644 --- a/trunk/drivers/iommu/Makefile +++ b/trunk/drivers/iommu/Makefile @@ -8,5 +8,3 @@ obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o -obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o -obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o diff --git a/trunk/drivers/iommu/amd_iommu_init.c b/trunk/drivers/iommu/amd_iommu_init.c index c56790375e0f..a35e98ad9725 100644 --- a/trunk/drivers/iommu/amd_iommu_init.c +++ b/trunk/drivers/iommu/amd_iommu_init.c @@ -196,8 +196,6 @@ static u32 rlookup_table_size; /* size if the rlookup table */ */ extern void iommu_flush_all_caches(struct amd_iommu *iommu); -static int amd_iommu_enable_interrupts(void); - static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -360,6 +358,8 @@ static void iommu_disable(struct amd_iommu *iommu) */ static u8 * __init iommu_map_mmio_space(u64 address) { + u8 *ret; + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", address); @@ -367,7 +367,13 @@ static u8 * __init iommu_map_mmio_space(u64 address) return NULL; } - return ioremap_nocache(address, MMIO_REGION_LENGTH); + ret = ioremap_nocache(address, MMIO_REGION_LENGTH); + if (ret != NULL) + return ret; + + release_mem_region(address, MMIO_REGION_LENGTH); + + return NULL; } static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) @@ -1125,9 +1131,8 @@ static int iommu_setup_msi(struct amd_iommu *iommu) { int r; - r = pci_enable_msi(iommu->dev); - if (r) - return r; + if (pci_enable_msi(iommu->dev)) + return 1; r = request_threaded_irq(iommu->dev->irq, amd_iommu_int_handler, @@ -1137,36 +1142,27 @@ static int iommu_setup_msi(struct amd_iommu *iommu) if (r) { pci_disable_msi(iommu->dev); - return r; + return 1; } iommu->int_enabled = true; + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + + if (iommu->ppr_log != NULL) + iommu_feature_enable(iommu, CONTROL_PPFINT_EN); return 0; } static int iommu_init_msi(struct amd_iommu *iommu) { - int ret; - if (iommu->int_enabled) - goto enable_faults; + return 0; if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) - ret = iommu_setup_msi(iommu); - else - ret = -ENODEV; - - if (ret) - return ret; + return iommu_setup_msi(iommu); -enable_faults: - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); - - if (iommu->ppr_log != NULL) - iommu_feature_enable(iommu, CONTROL_PPFINT_EN); - - return 0; + return 1; } /**************************************************************************** @@ -1385,6 +1381,7 @@ static void enable_iommus(void) iommu_enable_ppr_log(iommu); iommu_enable_gt(iommu); iommu_set_exclusion_range(iommu); + iommu_init_msi(iommu); iommu_enable(iommu); iommu_flush_all_caches(iommu); } @@ -1412,8 +1409,6 @@ static void amd_iommu_resume(void) /* re-load the hardware */ enable_iommus(); - - amd_iommu_enable_interrupts(); } static int amd_iommu_suspend(void) @@ -1429,40 +1424,10 @@ static struct syscore_ops amd_iommu_syscore_ops = { .resume = amd_iommu_resume, }; -static void __init free_on_init_error(void) -{ - amd_iommu_uninit_devices(); - - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); - - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); - - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); - - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - - free_iommu_all(); - - free_unity_maps(); - -#ifdef CONFIG_GART_IOMMU - /* - * We failed to initialize the AMD IOMMU - try fallback to GART - * if possible. - */ - gart_iommu_init(); - -#endif -} - /* - * This is the hardware init function for AMD IOMMU in the system. - * This function is called either from amd_iommu_init or from the interrupt - * remapping setup code. + * This is the core init function for AMD IOMMU hardware in the system. + * This function is called from the generic x86 DMA layer initialization + * code. * * This function basically parses the ACPI table for AMD IOMMU (IVRS) * three times: @@ -1481,21 +1446,16 @@ static void __init free_on_init_error(void) * remapping requirements parsed out of the ACPI table in * this last pass. * - * After everything is set up the IOMMUs are enabled and the necessary - * hotplug and suspend notifiers are registered. + * After that the hardware is initialized and ready to go. In the last + * step we do some Linux specific things like registering the driver in + * the dma_ops interface and initializing the suspend/resume support + * functions. Finally it prints some information about AMD IOMMUs and + * the driver state and enables the hardware. */ -int __init amd_iommu_init_hardware(void) +static int __init amd_iommu_init(void) { int i, ret = 0; - if (!amd_iommu_detected) - return -ENODEV; - - if (amd_iommu_dev_table != NULL) { - /* Hardware already initialized */ - return 0; - } - /* * First parse ACPI tables to find the largest Bus/Dev/Func * we need to handle. Upon this information the shared data @@ -1512,8 +1472,9 @@ int __init amd_iommu_init_hardware(void) alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); - /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; + + /* Device table - directly used by all IOMMUs */ amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(dev_table_size)); if (amd_iommu_dev_table == NULL) @@ -1585,65 +1546,20 @@ int __init amd_iommu_init_hardware(void) enable_iommus(); - amd_iommu_init_notifier(); - - register_syscore_ops(&amd_iommu_syscore_ops); - -out: - return ret; - -free: - free_on_init_error(); - - return ret; -} - -static int amd_iommu_enable_interrupts(void) -{ - struct amd_iommu *iommu; - int ret = 0; - - for_each_iommu(iommu) { - ret = iommu_init_msi(iommu); - if (ret) - goto out; - } - -out: - return ret; -} - -/* - * This is the core init function for AMD IOMMU hardware in the system. - * This function is called from the generic x86 DMA layer initialization - * code. - * - * The function calls amd_iommu_init_hardware() to setup and enable the - * IOMMU hardware if this has not happened yet. After that the driver - * registers for the DMA-API and for the IOMMU-API as necessary. - */ -static int __init amd_iommu_init(void) -{ - int ret = 0; - - ret = amd_iommu_init_hardware(); - if (ret) - goto out; - - ret = amd_iommu_enable_interrupts(); - if (ret) - goto free; - if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else ret = amd_iommu_init_dma_ops(); if (ret) - goto free; + goto free_disable; amd_iommu_init_api(); + amd_iommu_init_notifier(); + + register_syscore_ops(&amd_iommu_syscore_ops); + if (iommu_pass_through) goto out; @@ -1653,14 +1569,39 @@ static int __init amd_iommu_init(void) printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); x86_platform.iommu_shutdown = disable_iommus; - out: return ret; -free: +free_disable: disable_iommus(); - free_on_init_error(); +free: + amd_iommu_uninit_devices(); + + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); + + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); + + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); + + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + + free_iommu_all(); + + free_unity_maps(); + +#ifdef CONFIG_GART_IOMMU + /* + * We failed to initialize the AMD IOMMU - try fallback to GART + * if possible. + */ + gart_iommu_init(); + +#endif goto out; } diff --git a/trunk/drivers/iommu/amd_iommu_v2.c b/trunk/drivers/iommu/amd_iommu_v2.c index 036fe9bf157e..8add9f125d3e 100644 --- a/trunk/drivers/iommu/amd_iommu_v2.c +++ b/trunk/drivers/iommu/amd_iommu_v2.c @@ -921,16 +921,7 @@ static int __init amd_iommu_v2_init(void) size_t state_table_size; int ret; - pr_info("AMD IOMMUv2 driver by Joerg Roedel \n"); - - if (!amd_iommu_v2_supported()) { - pr_info("AMD IOMMUv2 functionality not available on this sytem\n"); - /* - * Load anyway to provide the symbols to other modules - * which may use AMD IOMMUv2 optionally. - */ - return 0; - } + pr_info("AMD IOMMUv2 driver by Joerg Roedel "); spin_lock_init(&state_lock); @@ -970,9 +961,6 @@ static void __exit amd_iommu_v2_exit(void) size_t state_table_size; int i; - if (!amd_iommu_v2_supported()) - return; - profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb); amd_iommu_unregister_ppr_notifier(&ppr_nb); diff --git a/trunk/drivers/iommu/tegra-gart.c b/trunk/drivers/iommu/tegra-gart.c deleted file mode 100644 index 779306ee7b16..000000000000 --- a/trunk/drivers/iommu/tegra-gart.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * IOMMU API for GART in Tegra20 - * - * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#define pr_fmt(fmt) "%s(): " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* bitmap of the page sizes currently supported */ -#define GART_IOMMU_PGSIZES (SZ_4K) - -#define GART_CONFIG 0x24 -#define GART_ENTRY_ADDR 0x28 -#define GART_ENTRY_DATA 0x2c -#define GART_ENTRY_PHYS_ADDR_VALID (1 << 31) - -#define GART_PAGE_SHIFT 12 -#define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT) -#define GART_PAGE_MASK \ - (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID) - -struct gart_client { - struct device *dev; - struct list_head list; -}; - -struct gart_device { - void __iomem *regs; - u32 *savedata; - u32 page_count; /* total remappable size */ - dma_addr_t iovmm_base; /* offset to vmm_area */ - spinlock_t pte_lock; /* for pagetable */ - struct list_head client; - spinlock_t client_lock; /* for client list */ - struct device *dev; -}; - -static struct gart_device *gart_handle; /* unique for a system */ - -#define GART_PTE(_pfn) \ - (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT)) - -/* - * Any interaction between any block on PPSB and a block on APB or AHB - * must have these read-back to ensure the APB/AHB bus transaction is - * complete before initiating activity on the PPSB block. - */ -#define FLUSH_GART_REGS(gart) ((void)readl((gart)->regs + GART_CONFIG)) - -#define for_each_gart_pte(gart, iova) \ - for (iova = gart->iovmm_base; \ - iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \ - iova += GART_PAGE_SIZE) - -static inline void gart_set_pte(struct gart_device *gart, - unsigned long offs, u32 pte) -{ - writel(offs, gart->regs + GART_ENTRY_ADDR); - writel(pte, gart->regs + GART_ENTRY_DATA); - - dev_dbg(gart->dev, "%s %08lx:%08x\n", - pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK); -} - -static inline unsigned long gart_read_pte(struct gart_device *gart, - unsigned long offs) -{ - unsigned long pte; - - writel(offs, gart->regs + GART_ENTRY_ADDR); - pte = readl(gart->regs + GART_ENTRY_DATA); - - return pte; -} - -static void do_gart_setup(struct gart_device *gart, const u32 *data) -{ - unsigned long iova; - - for_each_gart_pte(gart, iova) - gart_set_pte(gart, iova, data ? *(data++) : 0); - - writel(1, gart->regs + GART_CONFIG); - FLUSH_GART_REGS(gart); -} - -#ifdef DEBUG -static void gart_dump_table(struct gart_device *gart) -{ - unsigned long iova; - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); - for_each_gart_pte(gart, iova) { - unsigned long pte; - - pte = gart_read_pte(gart, iova); - - dev_dbg(gart->dev, "%s %08lx:%08lx\n", - (GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ", - iova, pte & GART_PAGE_MASK); - } - spin_unlock_irqrestore(&gart->pte_lock, flags); -} -#else -static inline void gart_dump_table(struct gart_device *gart) -{ -} -#endif - -static inline bool gart_iova_range_valid(struct gart_device *gart, - unsigned long iova, size_t bytes) -{ - unsigned long iova_start, iova_end, gart_start, gart_end; - - iova_start = iova; - iova_end = iova_start + bytes - 1; - gart_start = gart->iovmm_base; - gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1; - - if (iova_start < gart_start) - return false; - if (iova_end > gart_end) - return false; - return true; -} - -static int gart_iommu_attach_dev(struct iommu_domain *domain, - struct device *dev) -{ - struct gart_device *gart; - struct gart_client *client, *c; - int err = 0; - - gart = dev_get_drvdata(dev->parent); - if (!gart) - return -EINVAL; - domain->priv = gart; - - client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL); - if (!client) - return -ENOMEM; - client->dev = dev; - - spin_lock(&gart->client_lock); - list_for_each_entry(c, &gart->client, list) { - if (c->dev == dev) { - dev_err(gart->dev, - "%s is already attached\n", dev_name(dev)); - err = -EINVAL; - goto fail; - } - } - list_add(&client->list, &gart->client); - spin_unlock(&gart->client_lock); - dev_dbg(gart->dev, "Attached %s\n", dev_name(dev)); - return 0; - -fail: - devm_kfree(gart->dev, client); - spin_unlock(&gart->client_lock); - return err; -} - -static void gart_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) -{ - struct gart_device *gart = domain->priv; - struct gart_client *c; - - spin_lock(&gart->client_lock); - - list_for_each_entry(c, &gart->client, list) { - if (c->dev == dev) { - list_del(&c->list); - devm_kfree(gart->dev, c); - dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); - goto out; - } - } - dev_err(gart->dev, "Couldn't find\n"); -out: - spin_unlock(&gart->client_lock); -} - -static int gart_iommu_domain_init(struct iommu_domain *domain) -{ - return 0; -} - -static void gart_iommu_domain_destroy(struct iommu_domain *domain) -{ - struct gart_device *gart = domain->priv; - - if (!gart) - return; - - spin_lock(&gart->client_lock); - if (!list_empty(&gart->client)) { - struct gart_client *c; - - list_for_each_entry(c, &gart->client, list) - gart_iommu_detach_dev(domain, c->dev); - } - spin_unlock(&gart->client_lock); - domain->priv = NULL; -} - -static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t pa, size_t bytes, int prot) -{ - struct gart_device *gart = domain->priv; - unsigned long flags; - unsigned long pfn; - - if (!gart_iova_range_valid(gart, iova, bytes)) - return -EINVAL; - - spin_lock_irqsave(&gart->pte_lock, flags); - pfn = __phys_to_pfn(pa); - if (!pfn_valid(pfn)) { - dev_err(gart->dev, "Invalid page: %08x\n", pa); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return -EINVAL; - } - gart_set_pte(gart, iova, GART_PTE(pfn)); - FLUSH_GART_REGS(gart); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return 0; -} - -static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t bytes) -{ - struct gart_device *gart = domain->priv; - unsigned long flags; - - if (!gart_iova_range_valid(gart, iova, bytes)) - return 0; - - spin_lock_irqsave(&gart->pte_lock, flags); - gart_set_pte(gart, iova, 0); - FLUSH_GART_REGS(gart); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return 0; -} - -static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) -{ - struct gart_device *gart = domain->priv; - unsigned long pte; - phys_addr_t pa; - unsigned long flags; - - if (!gart_iova_range_valid(gart, iova, 0)) - return -EINVAL; - - spin_lock_irqsave(&gart->pte_lock, flags); - pte = gart_read_pte(gart, iova); - spin_unlock_irqrestore(&gart->pte_lock, flags); - - pa = (pte & GART_PAGE_MASK); - if (!pfn_valid(__phys_to_pfn(pa))) { - dev_err(gart->dev, "No entry for %08lx:%08x\n", iova, pa); - gart_dump_table(gart); - return -EINVAL; - } - return pa; -} - -static int gart_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) -{ - return 0; -} - -static struct iommu_ops gart_iommu_ops = { - .domain_init = gart_iommu_domain_init, - .domain_destroy = gart_iommu_domain_destroy, - .attach_dev = gart_iommu_attach_dev, - .detach_dev = gart_iommu_detach_dev, - .map = gart_iommu_map, - .unmap = gart_iommu_unmap, - .iova_to_phys = gart_iommu_iova_to_phys, - .domain_has_cap = gart_iommu_domain_has_cap, - .pgsize_bitmap = GART_IOMMU_PGSIZES, -}; - -static int tegra_gart_suspend(struct device *dev) -{ - struct gart_device *gart = dev_get_drvdata(dev); - unsigned long iova; - u32 *data = gart->savedata; - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); - for_each_gart_pte(gart, iova) - *(data++) = gart_read_pte(gart, iova); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return 0; -} - -static int tegra_gart_resume(struct device *dev) -{ - struct gart_device *gart = dev_get_drvdata(dev); - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); - do_gart_setup(gart, gart->savedata); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return 0; -} - -static int tegra_gart_probe(struct platform_device *pdev) -{ - struct gart_device *gart; - struct resource *res, *res_remap; - void __iomem *gart_regs; - int err; - struct device *dev = &pdev->dev; - - if (gart_handle) - return -EIO; - - BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); - - /* the GART memory aperture is required */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!res || !res_remap) { - dev_err(dev, "GART memory aperture expected\n"); - return -ENXIO; - } - - gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL); - if (!gart) { - dev_err(dev, "failed to allocate gart_device\n"); - return -ENOMEM; - } - - gart_regs = devm_ioremap(dev, res->start, resource_size(res)); - if (!gart_regs) { - dev_err(dev, "failed to remap GART registers\n"); - err = -ENXIO; - goto fail; - } - - gart->dev = &pdev->dev; - spin_lock_init(&gart->pte_lock); - spin_lock_init(&gart->client_lock); - INIT_LIST_HEAD(&gart->client); - gart->regs = gart_regs; - gart->iovmm_base = (dma_addr_t)res_remap->start; - gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT); - - gart->savedata = vmalloc(sizeof(u32) * gart->page_count); - if (!gart->savedata) { - dev_err(dev, "failed to allocate context save area\n"); - err = -ENOMEM; - goto fail; - } - - platform_set_drvdata(pdev, gart); - do_gart_setup(gart, NULL); - - gart_handle = gart; - return 0; - -fail: - if (gart_regs) - devm_iounmap(dev, gart_regs); - if (gart && gart->savedata) - vfree(gart->savedata); - devm_kfree(dev, gart); - return err; -} - -static int tegra_gart_remove(struct platform_device *pdev) -{ - struct gart_device *gart = platform_get_drvdata(pdev); - struct device *dev = gart->dev; - - writel(0, gart->regs + GART_CONFIG); - if (gart->savedata) - vfree(gart->savedata); - if (gart->regs) - devm_iounmap(dev, gart->regs); - devm_kfree(dev, gart); - gart_handle = NULL; - return 0; -} - -const struct dev_pm_ops tegra_gart_pm_ops = { - .suspend = tegra_gart_suspend, - .resume = tegra_gart_resume, -}; - -static struct platform_driver tegra_gart_driver = { - .probe = tegra_gart_probe, - .remove = tegra_gart_remove, - .driver = { - .owner = THIS_MODULE, - .name = "tegra-gart", - .pm = &tegra_gart_pm_ops, - }, -}; - -static int __devinit tegra_gart_init(void) -{ - bus_set_iommu(&platform_bus_type, &gart_iommu_ops); - return platform_driver_register(&tegra_gart_driver); -} - -static void __exit tegra_gart_exit(void) -{ - platform_driver_unregister(&tegra_gart_driver); -} - -subsys_initcall(tegra_gart_init); -module_exit(tegra_gart_exit); - -MODULE_DESCRIPTION("IOMMU API for GART in Tegra20"); -MODULE_AUTHOR("Hiroshi DOYU "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/iommu/tegra-smmu.c b/trunk/drivers/iommu/tegra-smmu.c deleted file mode 100644 index eb93c821f592..000000000000 --- a/trunk/drivers/iommu/tegra-smmu.c +++ /dev/null @@ -1,1034 +0,0 @@ -/* - * IOMMU API for SMMU in Tegra30 - * - * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#define pr_fmt(fmt) "%s(): " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -/* bitmap of the page sizes currently supported */ -#define SMMU_IOMMU_PGSIZES (SZ_4K) - -#define SMMU_CONFIG 0x10 -#define SMMU_CONFIG_DISABLE 0 -#define SMMU_CONFIG_ENABLE 1 - -#define SMMU_TLB_CONFIG 0x14 -#define SMMU_TLB_CONFIG_STATS__MASK (1 << 31) -#define SMMU_TLB_CONFIG_STATS__ENABLE (1 << 31) -#define SMMU_TLB_CONFIG_HIT_UNDER_MISS__ENABLE (1 << 29) -#define SMMU_TLB_CONFIG_ACTIVE_LINES__VALUE 0x10 -#define SMMU_TLB_CONFIG_RESET_VAL 0x20000010 - -#define SMMU_PTC_CONFIG 0x18 -#define SMMU_PTC_CONFIG_STATS__MASK (1 << 31) -#define SMMU_PTC_CONFIG_STATS__ENABLE (1 << 31) -#define SMMU_PTC_CONFIG_CACHE__ENABLE (1 << 29) -#define SMMU_PTC_CONFIG_INDEX_MAP__PATTERN 0x3f -#define SMMU_PTC_CONFIG_RESET_VAL 0x2000003f - -#define SMMU_PTB_ASID 0x1c -#define SMMU_PTB_ASID_CURRENT_SHIFT 0 - -#define SMMU_PTB_DATA 0x20 -#define SMMU_PTB_DATA_RESET_VAL 0 -#define SMMU_PTB_DATA_ASID_NONSECURE_SHIFT 29 -#define SMMU_PTB_DATA_ASID_WRITABLE_SHIFT 30 -#define SMMU_PTB_DATA_ASID_READABLE_SHIFT 31 - -#define SMMU_TLB_FLUSH 0x30 -#define SMMU_TLB_FLUSH_VA_MATCH_ALL 0 -#define SMMU_TLB_FLUSH_VA_MATCH_SECTION 2 -#define SMMU_TLB_FLUSH_VA_MATCH_GROUP 3 -#define SMMU_TLB_FLUSH_ASID_SHIFT 29 -#define SMMU_TLB_FLUSH_ASID_MATCH_DISABLE 0 -#define SMMU_TLB_FLUSH_ASID_MATCH_ENABLE 1 -#define SMMU_TLB_FLUSH_ASID_MATCH_SHIFT 31 - -#define SMMU_PTC_FLUSH 0x34 -#define SMMU_PTC_FLUSH_TYPE_ALL 0 -#define SMMU_PTC_FLUSH_TYPE_ADR 1 -#define SMMU_PTC_FLUSH_ADR_SHIFT 4 - -#define SMMU_ASID_SECURITY 0x38 - -#define SMMU_STATS_TLB_HIT_COUNT 0x1f0 -#define SMMU_STATS_TLB_MISS_COUNT 0x1f4 -#define SMMU_STATS_PTC_HIT_COUNT 0x1f8 -#define SMMU_STATS_PTC_MISS_COUNT 0x1fc - -#define SMMU_TRANSLATION_ENABLE_0 0x228 -#define SMMU_TRANSLATION_ENABLE_1 0x22c -#define SMMU_TRANSLATION_ENABLE_2 0x230 - -#define SMMU_AFI_ASID 0x238 /* PCIE */ -#define SMMU_AVPC_ASID 0x23c /* AVP */ -#define SMMU_DC_ASID 0x240 /* Display controller */ -#define SMMU_DCB_ASID 0x244 /* Display controller B */ -#define SMMU_EPP_ASID 0x248 /* Encoder pre-processor */ -#define SMMU_G2_ASID 0x24c /* 2D engine */ -#define SMMU_HC_ASID 0x250 /* Host1x */ -#define SMMU_HDA_ASID 0x254 /* High-def audio */ -#define SMMU_ISP_ASID 0x258 /* Image signal processor */ -#define SMMU_MPE_ASID 0x264 /* MPEG encoder */ -#define SMMU_NV_ASID 0x268 /* (3D) */ -#define SMMU_NV2_ASID 0x26c /* (3D) */ -#define SMMU_PPCS_ASID 0x270 /* AHB */ -#define SMMU_SATA_ASID 0x278 /* SATA */ -#define SMMU_VDE_ASID 0x27c /* Video decoder */ -#define SMMU_VI_ASID 0x280 /* Video input */ - -#define SMMU_PDE_NEXT_SHIFT 28 - -/* AHB Arbiter Registers */ -#define AHB_XBAR_CTRL 0xe0 -#define AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE 1 -#define AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT 17 - -#define SMMU_NUM_ASIDS 4 -#define SMMU_TLB_FLUSH_VA_SECTION__MASK 0xffc00000 -#define SMMU_TLB_FLUSH_VA_SECTION__SHIFT 12 /* right shift */ -#define SMMU_TLB_FLUSH_VA_GROUP__MASK 0xffffc000 -#define SMMU_TLB_FLUSH_VA_GROUP__SHIFT 12 /* right shift */ -#define SMMU_TLB_FLUSH_VA(iova, which) \ - ((((iova) & SMMU_TLB_FLUSH_VA_##which##__MASK) >> \ - SMMU_TLB_FLUSH_VA_##which##__SHIFT) | \ - SMMU_TLB_FLUSH_VA_MATCH_##which) -#define SMMU_PTB_ASID_CUR(n) \ - ((n) << SMMU_PTB_ASID_CURRENT_SHIFT) -#define SMMU_TLB_FLUSH_ASID_MATCH_disable \ - (SMMU_TLB_FLUSH_ASID_MATCH_DISABLE << \ - SMMU_TLB_FLUSH_ASID_MATCH_SHIFT) -#define SMMU_TLB_FLUSH_ASID_MATCH__ENABLE \ - (SMMU_TLB_FLUSH_ASID_MATCH_ENABLE << \ - SMMU_TLB_FLUSH_ASID_MATCH_SHIFT) - -#define SMMU_PAGE_SHIFT 12 -#define SMMU_PAGE_SIZE (1 << SMMU_PAGE_SHIFT) - -#define SMMU_PDIR_COUNT 1024 -#define SMMU_PDIR_SIZE (sizeof(unsigned long) * SMMU_PDIR_COUNT) -#define SMMU_PTBL_COUNT 1024 -#define SMMU_PTBL_SIZE (sizeof(unsigned long) * SMMU_PTBL_COUNT) -#define SMMU_PDIR_SHIFT 12 -#define SMMU_PDE_SHIFT 12 -#define SMMU_PTE_SHIFT 12 -#define SMMU_PFN_MASK 0x000fffff - -#define SMMU_ADDR_TO_PFN(addr) ((addr) >> 12) -#define SMMU_ADDR_TO_PDN(addr) ((addr) >> 22) -#define SMMU_PDN_TO_ADDR(addr) ((pdn) << 22) - -#define _READABLE (1 << SMMU_PTB_DATA_ASID_READABLE_SHIFT) -#define _WRITABLE (1 << SMMU_PTB_DATA_ASID_WRITABLE_SHIFT) -#define _NONSECURE (1 << SMMU_PTB_DATA_ASID_NONSECURE_SHIFT) -#define _PDE_NEXT (1 << SMMU_PDE_NEXT_SHIFT) -#define _MASK_ATTR (_READABLE | _WRITABLE | _NONSECURE) - -#define _PDIR_ATTR (_READABLE | _WRITABLE | _NONSECURE) - -#define _PDE_ATTR (_READABLE | _WRITABLE | _NONSECURE) -#define _PDE_ATTR_N (_PDE_ATTR | _PDE_NEXT) -#define _PDE_VACANT(pdn) (((pdn) << 10) | _PDE_ATTR) - -#define _PTE_ATTR (_READABLE | _WRITABLE | _NONSECURE) -#define _PTE_VACANT(addr) (((addr) >> SMMU_PAGE_SHIFT) | _PTE_ATTR) - -#define SMMU_MK_PDIR(page, attr) \ - ((page_to_phys(page) >> SMMU_PDIR_SHIFT) | (attr)) -#define SMMU_MK_PDE(page, attr) \ - (unsigned long)((page_to_phys(page) >> SMMU_PDE_SHIFT) | (attr)) -#define SMMU_EX_PTBL_PAGE(pde) \ - pfn_to_page((unsigned long)(pde) & SMMU_PFN_MASK) -#define SMMU_PFN_TO_PTE(pfn, attr) (unsigned long)((pfn) | (attr)) - -#define SMMU_ASID_ENABLE(asid) ((asid) | (1 << 31)) -#define SMMU_ASID_DISABLE 0 -#define SMMU_ASID_ASID(n) ((n) & ~SMMU_ASID_ENABLE(0)) - -#define smmu_client_enable_hwgrp(c, m) smmu_client_set_hwgrp(c, m, 1) -#define smmu_client_disable_hwgrp(c) smmu_client_set_hwgrp(c, 0, 0) -#define __smmu_client_enable_hwgrp(c, m) __smmu_client_set_hwgrp(c, m, 1) -#define __smmu_client_disable_hwgrp(c) __smmu_client_set_hwgrp(c, 0, 0) - -#define HWGRP_INIT(client) [HWGRP_##client] = SMMU_##client##_ASID - -static const u32 smmu_hwgrp_asid_reg[] = { - HWGRP_INIT(AFI), - HWGRP_INIT(AVPC), - HWGRP_INIT(DC), - HWGRP_INIT(DCB), - HWGRP_INIT(EPP), - HWGRP_INIT(G2), - HWGRP_INIT(HC), - HWGRP_INIT(HDA), - HWGRP_INIT(ISP), - HWGRP_INIT(MPE), - HWGRP_INIT(NV), - HWGRP_INIT(NV2), - HWGRP_INIT(PPCS), - HWGRP_INIT(SATA), - HWGRP_INIT(VDE), - HWGRP_INIT(VI), -}; -#define HWGRP_ASID_REG(x) (smmu_hwgrp_asid_reg[x]) - -/* - * Per client for address space - */ -struct smmu_client { - struct device *dev; - struct list_head list; - struct smmu_as *as; - u32 hwgrp; -}; - -/* - * Per address space - */ -struct smmu_as { - struct smmu_device *smmu; /* back pointer to container */ - unsigned int asid; - spinlock_t lock; /* for pagetable */ - struct page *pdir_page; - unsigned long pdir_attr; - unsigned long pde_attr; - unsigned long pte_attr; - unsigned int *pte_count; - - struct list_head client; - spinlock_t client_lock; /* for client list */ -}; - -/* - * Per SMMU device - IOMMU device - */ -struct smmu_device { - void __iomem *regs, *regs_ahbarb; - unsigned long iovmm_base; /* remappable base address */ - unsigned long page_count; /* total remappable size */ - spinlock_t lock; - char *name; - struct device *dev; - int num_as; - struct smmu_as *as; /* Run-time allocated array */ - struct page *avp_vector_page; /* dummy page shared by all AS's */ - - /* - * Register image savers for suspend/resume - */ - unsigned long translation_enable_0; - unsigned long translation_enable_1; - unsigned long translation_enable_2; - unsigned long asid_security; -}; - -static struct smmu_device *smmu_handle; /* unique for a system */ - -/* - * SMMU/AHB register accessors - */ -static inline u32 smmu_read(struct smmu_device *smmu, size_t offs) -{ - return readl(smmu->regs + offs); -} -static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs) -{ - writel(val, smmu->regs + offs); -} - -static inline u32 ahb_read(struct smmu_device *smmu, size_t offs) -{ - return readl(smmu->regs_ahbarb + offs); -} -static inline void ahb_write(struct smmu_device *smmu, u32 val, size_t offs) -{ - writel(val, smmu->regs_ahbarb + offs); -} - -#define VA_PAGE_TO_PA(va, page) \ - (page_to_phys(page) + ((unsigned long)(va) & ~PAGE_MASK)) - -#define FLUSH_CPU_DCACHE(va, page, size) \ - do { \ - unsigned long _pa_ = VA_PAGE_TO_PA(va, page); \ - __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \ - outer_flush_range(_pa_, _pa_+(size_t)(size)); \ - } while (0) - -/* - * Any interaction between any block on PPSB and a block on APB or AHB - * must have these read-back barriers to ensure the APB/AHB bus - * transaction is complete before initiating activity on the PPSB - * block. - */ -#define FLUSH_SMMU_REGS(smmu) smmu_read(smmu, SMMU_CONFIG) - -#define smmu_client_hwgrp(c) (u32)((c)->dev->platform_data) - -static int __smmu_client_set_hwgrp(struct smmu_client *c, - unsigned long map, int on) -{ - int i; - struct smmu_as *as = c->as; - u32 val, offs, mask = SMMU_ASID_ENABLE(as->asid); - struct smmu_device *smmu = as->smmu; - - WARN_ON(!on && map); - if (on && !map) - return -EINVAL; - if (!on) - map = smmu_client_hwgrp(c); - - for_each_set_bit(i, &map, HWGRP_COUNT) { - offs = HWGRP_ASID_REG(i); - val = smmu_read(smmu, offs); - if (on) { - if (WARN_ON(val & mask)) - goto err_hw_busy; - val |= mask; - } else { - WARN_ON((val & mask) == mask); - val &= ~mask; - } - smmu_write(smmu, val, offs); - } - FLUSH_SMMU_REGS(smmu); - c->hwgrp = map; - return 0; - -err_hw_busy: - for_each_set_bit(i, &map, HWGRP_COUNT) { - offs = HWGRP_ASID_REG(i); - val = smmu_read(smmu, offs); - val &= ~mask; - smmu_write(smmu, val, offs); - } - return -EBUSY; -} - -static int smmu_client_set_hwgrp(struct smmu_client *c, u32 map, int on) -{ - u32 val; - unsigned long flags; - struct smmu_as *as = c->as; - struct smmu_device *smmu = as->smmu; - - spin_lock_irqsave(&smmu->lock, flags); - val = __smmu_client_set_hwgrp(c, map, on); - spin_unlock_irqrestore(&smmu->lock, flags); - return val; -} - -/* - * Flush all TLB entries and all PTC entries - * Caller must lock smmu - */ -static void smmu_flush_regs(struct smmu_device *smmu, int enable) -{ - u32 val; - - smmu_write(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH); - FLUSH_SMMU_REGS(smmu); - val = SMMU_TLB_FLUSH_VA_MATCH_ALL | - SMMU_TLB_FLUSH_ASID_MATCH_disable; - smmu_write(smmu, val, SMMU_TLB_FLUSH); - - if (enable) - smmu_write(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG); - FLUSH_SMMU_REGS(smmu); -} - -static void smmu_setup_regs(struct smmu_device *smmu) -{ - int i; - u32 val; - - for (i = 0; i < smmu->num_as; i++) { - struct smmu_as *as = &smmu->as[i]; - struct smmu_client *c; - - smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID); - val = as->pdir_page ? - SMMU_MK_PDIR(as->pdir_page, as->pdir_attr) : - SMMU_PTB_DATA_RESET_VAL; - smmu_write(smmu, val, SMMU_PTB_DATA); - - list_for_each_entry(c, &as->client, list) - __smmu_client_set_hwgrp(c, c->hwgrp, 1); - } - - smmu_write(smmu, smmu->translation_enable_0, SMMU_TRANSLATION_ENABLE_0); - smmu_write(smmu, smmu->translation_enable_1, SMMU_TRANSLATION_ENABLE_1); - smmu_write(smmu, smmu->translation_enable_2, SMMU_TRANSLATION_ENABLE_2); - smmu_write(smmu, smmu->asid_security, SMMU_ASID_SECURITY); - smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_TLB_CONFIG); - smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_PTC_CONFIG); - - smmu_flush_regs(smmu, 1); - - val = ahb_read(smmu, AHB_XBAR_CTRL); - val |= AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE << - AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT; - ahb_write(smmu, val, AHB_XBAR_CTRL); -} - -static void flush_ptc_and_tlb(struct smmu_device *smmu, - struct smmu_as *as, dma_addr_t iova, - unsigned long *pte, struct page *page, int is_pde) -{ - u32 val; - unsigned long tlb_flush_va = is_pde - ? SMMU_TLB_FLUSH_VA(iova, SECTION) - : SMMU_TLB_FLUSH_VA(iova, GROUP); - - val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pte, page); - smmu_write(smmu, val, SMMU_PTC_FLUSH); - FLUSH_SMMU_REGS(smmu); - val = tlb_flush_va | - SMMU_TLB_FLUSH_ASID_MATCH__ENABLE | - (as->asid << SMMU_TLB_FLUSH_ASID_SHIFT); - smmu_write(smmu, val, SMMU_TLB_FLUSH); - FLUSH_SMMU_REGS(smmu); -} - -static void free_ptbl(struct smmu_as *as, dma_addr_t iova) -{ - unsigned long pdn = SMMU_ADDR_TO_PDN(iova); - unsigned long *pdir = (unsigned long *)page_address(as->pdir_page); - - if (pdir[pdn] != _PDE_VACANT(pdn)) { - dev_dbg(as->smmu->dev, "pdn: %lx\n", pdn); - - ClearPageReserved(SMMU_EX_PTBL_PAGE(pdir[pdn])); - __free_page(SMMU_EX_PTBL_PAGE(pdir[pdn])); - pdir[pdn] = _PDE_VACANT(pdn); - FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]); - flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn], - as->pdir_page, 1); - } -} - -static void free_pdir(struct smmu_as *as) -{ - unsigned addr; - int count; - struct device *dev = as->smmu->dev; - - if (!as->pdir_page) - return; - - addr = as->smmu->iovmm_base; - count = as->smmu->page_count; - while (count-- > 0) { - free_ptbl(as, addr); - addr += SMMU_PAGE_SIZE * SMMU_PTBL_COUNT; - } - ClearPageReserved(as->pdir_page); - __free_page(as->pdir_page); - as->pdir_page = NULL; - devm_kfree(dev, as->pte_count); - as->pte_count = NULL; -} - -/* - * Maps PTBL for given iova and returns the PTE address - * Caller must unmap the mapped PTBL returned in *ptbl_page_p - */ -static unsigned long *locate_pte(struct smmu_as *as, - dma_addr_t iova, bool allocate, - struct page **ptbl_page_p, - unsigned int **count) -{ - unsigned long ptn = SMMU_ADDR_TO_PFN(iova); - unsigned long pdn = SMMU_ADDR_TO_PDN(iova); - unsigned long *pdir = page_address(as->pdir_page); - unsigned long *ptbl; - - if (pdir[pdn] != _PDE_VACANT(pdn)) { - /* Mapped entry table already exists */ - *ptbl_page_p = SMMU_EX_PTBL_PAGE(pdir[pdn]); - ptbl = page_address(*ptbl_page_p); - } else if (!allocate) { - return NULL; - } else { - int pn; - unsigned long addr = SMMU_PDN_TO_ADDR(pdn); - - /* Vacant - allocate a new page table */ - dev_dbg(as->smmu->dev, "New PTBL pdn: %lx\n", pdn); - - *ptbl_page_p = alloc_page(GFP_ATOMIC); - if (!*ptbl_page_p) { - dev_err(as->smmu->dev, - "failed to allocate smmu_device page table\n"); - return NULL; - } - SetPageReserved(*ptbl_page_p); - ptbl = (unsigned long *)page_address(*ptbl_page_p); - for (pn = 0; pn < SMMU_PTBL_COUNT; - pn++, addr += SMMU_PAGE_SIZE) { - ptbl[pn] = _PTE_VACANT(addr); - } - FLUSH_CPU_DCACHE(ptbl, *ptbl_page_p, SMMU_PTBL_SIZE); - pdir[pdn] = SMMU_MK_PDE(*ptbl_page_p, - as->pde_attr | _PDE_NEXT); - FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]); - flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn], - as->pdir_page, 1); - } - *count = &as->pte_count[pdn]; - - return &ptbl[ptn % SMMU_PTBL_COUNT]; -} - -#ifdef CONFIG_SMMU_SIG_DEBUG -static void put_signature(struct smmu_as *as, - dma_addr_t iova, unsigned long pfn) -{ - struct page *page; - unsigned long *vaddr; - - page = pfn_to_page(pfn); - vaddr = page_address(page); - if (!vaddr) - return; - - vaddr[0] = iova; - vaddr[1] = pfn << PAGE_SHIFT; - FLUSH_CPU_DCACHE(vaddr, page, sizeof(vaddr[0]) * 2); -} -#else -static inline void put_signature(struct smmu_as *as, - unsigned long addr, unsigned long pfn) -{ -} -#endif - -/* - * Caller must lock/unlock as - */ -static int alloc_pdir(struct smmu_as *as) -{ - unsigned long *pdir; - int pdn; - u32 val; - struct smmu_device *smmu = as->smmu; - - if (as->pdir_page) - return 0; - - as->pte_count = devm_kzalloc(smmu->dev, - sizeof(as->pte_count[0]) * SMMU_PDIR_COUNT, GFP_KERNEL); - if (!as->pte_count) { - dev_err(smmu->dev, - "failed to allocate smmu_device PTE cunters\n"); - return -ENOMEM; - } - as->pdir_page = alloc_page(GFP_KERNEL | __GFP_DMA); - if (!as->pdir_page) { - dev_err(smmu->dev, - "failed to allocate smmu_device page directory\n"); - devm_kfree(smmu->dev, as->pte_count); - as->pte_count = NULL; - return -ENOMEM; - } - SetPageReserved(as->pdir_page); - pdir = page_address(as->pdir_page); - - for (pdn = 0; pdn < SMMU_PDIR_COUNT; pdn++) - pdir[pdn] = _PDE_VACANT(pdn); - FLUSH_CPU_DCACHE(pdir, as->pdir_page, SMMU_PDIR_SIZE); - val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pdir, as->pdir_page); - smmu_write(smmu, val, SMMU_PTC_FLUSH); - FLUSH_SMMU_REGS(as->smmu); - val = SMMU_TLB_FLUSH_VA_MATCH_ALL | - SMMU_TLB_FLUSH_ASID_MATCH__ENABLE | - (as->asid << SMMU_TLB_FLUSH_ASID_SHIFT); - smmu_write(smmu, val, SMMU_TLB_FLUSH); - FLUSH_SMMU_REGS(as->smmu); - - return 0; -} - -static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova) -{ - unsigned long *pte; - struct page *page; - unsigned int *count; - - pte = locate_pte(as, iova, false, &page, &count); - if (WARN_ON(!pte)) - return; - - if (WARN_ON(*pte == _PTE_VACANT(iova))) - return; - - *pte = _PTE_VACANT(iova); - FLUSH_CPU_DCACHE(pte, page, sizeof(*pte)); - flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0); - if (!--(*count)) { - free_ptbl(as, iova); - smmu_flush_regs(as->smmu, 0); - } -} - -static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova, - unsigned long pfn) -{ - struct smmu_device *smmu = as->smmu; - unsigned long *pte; - unsigned int *count; - struct page *page; - - pte = locate_pte(as, iova, true, &page, &count); - if (WARN_ON(!pte)) - return; - - if (*pte == _PTE_VACANT(iova)) - (*count)++; - *pte = SMMU_PFN_TO_PTE(pfn, as->pte_attr); - if (unlikely((*pte == _PTE_VACANT(iova)))) - (*count)--; - FLUSH_CPU_DCACHE(pte, page, sizeof(*pte)); - flush_ptc_and_tlb(smmu, as, iova, pte, page, 0); - put_signature(as, iova, pfn); -} - -static int smmu_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t pa, size_t bytes, int prot) -{ - struct smmu_as *as = domain->priv; - unsigned long pfn = __phys_to_pfn(pa); - unsigned long flags; - - dev_dbg(as->smmu->dev, "[%d] %08lx:%08x\n", as->asid, iova, pa); - - if (!pfn_valid(pfn)) - return -ENOMEM; - - spin_lock_irqsave(&as->lock, flags); - __smmu_iommu_map_pfn(as, iova, pfn); - spin_unlock_irqrestore(&as->lock, flags); - return 0; -} - -static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t bytes) -{ - struct smmu_as *as = domain->priv; - unsigned long flags; - - dev_dbg(as->smmu->dev, "[%d] %08lx\n", as->asid, iova); - - spin_lock_irqsave(&as->lock, flags); - __smmu_iommu_unmap(as, iova); - spin_unlock_irqrestore(&as->lock, flags); - return SMMU_PAGE_SIZE; -} - -static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) -{ - struct smmu_as *as = domain->priv; - unsigned long *pte; - unsigned int *count; - struct page *page; - unsigned long pfn; - unsigned long flags; - - spin_lock_irqsave(&as->lock, flags); - - pte = locate_pte(as, iova, true, &page, &count); - pfn = *pte & SMMU_PFN_MASK; - WARN_ON(!pfn_valid(pfn)); - dev_dbg(as->smmu->dev, - "iova:%08lx pfn:%08lx asid:%d\n", iova, pfn, as->asid); - - spin_unlock_irqrestore(&as->lock, flags); - return PFN_PHYS(pfn); -} - -static int smmu_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) -{ - return 0; -} - -static int smmu_iommu_attach_dev(struct iommu_domain *domain, - struct device *dev) -{ - struct smmu_as *as = domain->priv; - struct smmu_device *smmu = as->smmu; - struct smmu_client *client, *c; - u32 map; - int err; - - client = devm_kzalloc(smmu->dev, sizeof(*c), GFP_KERNEL); - if (!client) - return -ENOMEM; - client->dev = dev; - client->as = as; - map = (unsigned long)dev->platform_data; - if (!map) - return -EINVAL; - - err = smmu_client_enable_hwgrp(client, map); - if (err) - goto err_hwgrp; - - spin_lock(&as->client_lock); - list_for_each_entry(c, &as->client, list) { - if (c->dev == dev) { - dev_err(smmu->dev, - "%s is already attached\n", dev_name(c->dev)); - err = -EINVAL; - goto err_client; - } - } - list_add(&client->list, &as->client); - spin_unlock(&as->client_lock); - - /* - * Reserve "page zero" for AVP vectors using a common dummy - * page. - */ - if (map & HWG_AVPC) { - struct page *page; - - page = as->smmu->avp_vector_page; - __smmu_iommu_map_pfn(as, 0, page_to_pfn(page)); - - pr_info("Reserve \"page zero\" for AVP vectors using a common dummy\n"); - } - - dev_dbg(smmu->dev, "%s is attached\n", dev_name(c->dev)); - return 0; - -err_client: - smmu_client_disable_hwgrp(client); - spin_unlock(&as->client_lock); -err_hwgrp: - devm_kfree(smmu->dev, client); - return err; -} - -static void smmu_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) -{ - struct smmu_as *as = domain->priv; - struct smmu_device *smmu = as->smmu; - struct smmu_client *c; - - spin_lock(&as->client_lock); - - list_for_each_entry(c, &as->client, list) { - if (c->dev == dev) { - smmu_client_disable_hwgrp(c); - list_del(&c->list); - devm_kfree(smmu->dev, c); - c->as = NULL; - dev_dbg(smmu->dev, - "%s is detached\n", dev_name(c->dev)); - goto out; - } - } - dev_err(smmu->dev, "Couldn't find %s\n", dev_name(c->dev)); -out: - spin_unlock(&as->client_lock); -} - -static int smmu_iommu_domain_init(struct iommu_domain *domain) -{ - int i; - unsigned long flags; - struct smmu_as *as; - struct smmu_device *smmu = smmu_handle; - - /* Look for a free AS with lock held */ - for (i = 0; i < smmu->num_as; i++) { - struct smmu_as *tmp = &smmu->as[i]; - - spin_lock_irqsave(&tmp->lock, flags); - if (!tmp->pdir_page) { - as = tmp; - goto found; - } - spin_unlock_irqrestore(&tmp->lock, flags); - } - dev_err(smmu->dev, "no free AS\n"); - return -ENODEV; - -found: - if (alloc_pdir(as) < 0) - goto err_alloc_pdir; - - spin_lock(&smmu->lock); - - /* Update PDIR register */ - smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID); - smmu_write(smmu, - SMMU_MK_PDIR(as->pdir_page, as->pdir_attr), SMMU_PTB_DATA); - FLUSH_SMMU_REGS(smmu); - - spin_unlock(&smmu->lock); - - spin_unlock_irqrestore(&as->lock, flags); - domain->priv = as; - - dev_dbg(smmu->dev, "smmu_as@%p\n", as); - return 0; - -err_alloc_pdir: - spin_unlock_irqrestore(&as->lock, flags); - return -ENODEV; -} - -static void smmu_iommu_domain_destroy(struct iommu_domain *domain) -{ - struct smmu_as *as = domain->priv; - struct smmu_device *smmu = as->smmu; - unsigned long flags; - - spin_lock_irqsave(&as->lock, flags); - - if (as->pdir_page) { - spin_lock(&smmu->lock); - smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID); - smmu_write(smmu, SMMU_PTB_DATA_RESET_VAL, SMMU_PTB_DATA); - FLUSH_SMMU_REGS(smmu); - spin_unlock(&smmu->lock); - - free_pdir(as); - } - - if (!list_empty(&as->client)) { - struct smmu_client *c; - - list_for_each_entry(c, &as->client, list) - smmu_iommu_detach_dev(domain, c->dev); - } - - spin_unlock_irqrestore(&as->lock, flags); - - domain->priv = NULL; - dev_dbg(smmu->dev, "smmu_as@%p\n", as); -} - -static struct iommu_ops smmu_iommu_ops = { - .domain_init = smmu_iommu_domain_init, - .domain_destroy = smmu_iommu_domain_destroy, - .attach_dev = smmu_iommu_attach_dev, - .detach_dev = smmu_iommu_detach_dev, - .map = smmu_iommu_map, - .unmap = smmu_iommu_unmap, - .iova_to_phys = smmu_iommu_iova_to_phys, - .domain_has_cap = smmu_iommu_domain_has_cap, - .pgsize_bitmap = SMMU_IOMMU_PGSIZES, -}; - -static int tegra_smmu_suspend(struct device *dev) -{ - struct smmu_device *smmu = dev_get_drvdata(dev); - - smmu->translation_enable_0 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_0); - smmu->translation_enable_1 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_1); - smmu->translation_enable_2 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_2); - smmu->asid_security = smmu_read(smmu, SMMU_ASID_SECURITY); - return 0; -} - -static int tegra_smmu_resume(struct device *dev) -{ - struct smmu_device *smmu = dev_get_drvdata(dev); - unsigned long flags; - - spin_lock_irqsave(&smmu->lock, flags); - smmu_setup_regs(smmu); - spin_unlock_irqrestore(&smmu->lock, flags); - return 0; -} - -static int tegra_smmu_probe(struct platform_device *pdev) -{ - struct smmu_device *smmu; - struct resource *regs, *regs2, *window; - struct device *dev = &pdev->dev; - int i, err = 0; - - if (smmu_handle) - return -EIO; - - BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT); - - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - regs2 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - window = platform_get_resource(pdev, IORESOURCE_MEM, 2); - if (!regs || !regs2 || !window) { - dev_err(dev, "No SMMU resources\n"); - return -ENODEV; - } - - smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); - if (!smmu) { - dev_err(dev, "failed to allocate smmu_device\n"); - return -ENOMEM; - } - - smmu->dev = dev; - smmu->num_as = SMMU_NUM_ASIDS; - smmu->iovmm_base = (unsigned long)window->start; - smmu->page_count = resource_size(window) >> SMMU_PAGE_SHIFT; - smmu->regs = devm_ioremap(dev, regs->start, resource_size(regs)); - smmu->regs_ahbarb = devm_ioremap(dev, regs2->start, - resource_size(regs2)); - if (!smmu->regs || !smmu->regs_ahbarb) { - dev_err(dev, "failed to remap SMMU registers\n"); - err = -ENXIO; - goto fail; - } - - smmu->translation_enable_0 = ~0; - smmu->translation_enable_1 = ~0; - smmu->translation_enable_2 = ~0; - smmu->asid_security = 0; - - smmu->as = devm_kzalloc(dev, - sizeof(smmu->as[0]) * smmu->num_as, GFP_KERNEL); - if (!smmu->as) { - dev_err(dev, "failed to allocate smmu_as\n"); - err = -ENOMEM; - goto fail; - } - - for (i = 0; i < smmu->num_as; i++) { - struct smmu_as *as = &smmu->as[i]; - - as->smmu = smmu; - as->asid = i; - as->pdir_attr = _PDIR_ATTR; - as->pde_attr = _PDE_ATTR; - as->pte_attr = _PTE_ATTR; - - spin_lock_init(&as->lock); - INIT_LIST_HEAD(&as->client); - } - spin_lock_init(&smmu->lock); - smmu_setup_regs(smmu); - platform_set_drvdata(pdev, smmu); - - smmu->avp_vector_page = alloc_page(GFP_KERNEL); - if (!smmu->avp_vector_page) - goto fail; - - smmu_handle = smmu; - return 0; - -fail: - if (smmu->avp_vector_page) - __free_page(smmu->avp_vector_page); - if (smmu->regs) - devm_iounmap(dev, smmu->regs); - if (smmu->regs_ahbarb) - devm_iounmap(dev, smmu->regs_ahbarb); - if (smmu && smmu->as) { - for (i = 0; i < smmu->num_as; i++) { - if (smmu->as[i].pdir_page) { - ClearPageReserved(smmu->as[i].pdir_page); - __free_page(smmu->as[i].pdir_page); - } - } - devm_kfree(dev, smmu->as); - } - devm_kfree(dev, smmu); - return err; -} - -static int tegra_smmu_remove(struct platform_device *pdev) -{ - struct smmu_device *smmu = platform_get_drvdata(pdev); - struct device *dev = smmu->dev; - - smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG); - platform_set_drvdata(pdev, NULL); - if (smmu->as) { - int i; - - for (i = 0; i < smmu->num_as; i++) - free_pdir(&smmu->as[i]); - devm_kfree(dev, smmu->as); - } - if (smmu->avp_vector_page) - __free_page(smmu->avp_vector_page); - if (smmu->regs) - devm_iounmap(dev, smmu->regs); - if (smmu->regs_ahbarb) - devm_iounmap(dev, smmu->regs_ahbarb); - devm_kfree(dev, smmu); - smmu_handle = NULL; - return 0; -} - -const struct dev_pm_ops tegra_smmu_pm_ops = { - .suspend = tegra_smmu_suspend, - .resume = tegra_smmu_resume, -}; - -static struct platform_driver tegra_smmu_driver = { - .probe = tegra_smmu_probe, - .remove = tegra_smmu_remove, - .driver = { - .owner = THIS_MODULE, - .name = "tegra-smmu", - .pm = &tegra_smmu_pm_ops, - }, -}; - -static int __devinit tegra_smmu_init(void) -{ - bus_set_iommu(&platform_bus_type, &smmu_iommu_ops); - return platform_driver_register(&tegra_smmu_driver); -} - -static void __exit tegra_smmu_exit(void) -{ - platform_driver_unregister(&tegra_smmu_driver); -} - -subsys_initcall(tegra_smmu_init); -module_exit(tegra_smmu_exit); - -MODULE_DESCRIPTION("IOMMU API for SMMU in Tegra30"); -MODULE_AUTHOR("Hiroshi DOYU "); -MODULE_LICENSE("GPL v2"); diff --git a/trunk/drivers/message/fusion/mptbase.c b/trunk/drivers/message/fusion/mptbase.c index a5c591ffe395..a7dc4672d996 100644 --- a/trunk/drivers/message/fusion/mptbase.c +++ b/trunk/drivers/message/fusion/mptbase.c @@ -346,7 +346,7 @@ static int mpt_remove_dead_ioc_func(void *arg) if ((pdev == NULL)) return -1; - pci_stop_and_remove_bus_device(pdev); + pci_remove_bus_device(pdev); return 0; } diff --git a/trunk/drivers/mtd/ubi/build.c b/trunk/drivers/mtd/ubi/build.c index 0fde9fc7d2e5..115749f20f9e 100644 --- a/trunk/drivers/mtd/ubi/build.c +++ b/trunk/drivers/mtd/ubi/build.c @@ -945,8 +945,12 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) goto out_free; err = -ENOMEM; - ubi->peb_buf = vmalloc(ubi->peb_size); - if (!ubi->peb_buf) + ubi->peb_buf1 = vmalloc(ubi->peb_size); + if (!ubi->peb_buf1) + goto out_free; + + ubi->peb_buf2 = vmalloc(ubi->peb_size); + if (!ubi->peb_buf2) goto out_free; err = ubi_debugging_init_dev(ubi); @@ -1025,7 +1029,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) out_debugging: ubi_debugging_exit_dev(ubi); out_free: - vfree(ubi->peb_buf); + vfree(ubi->peb_buf1); + vfree(ubi->peb_buf2); if (ref) put_device(&ubi->dev); else @@ -1096,7 +1101,8 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) vfree(ubi->vtbl); put_mtd_device(ubi->mtd); ubi_debugging_exit_dev(ubi); - vfree(ubi->peb_buf); + vfree(ubi->peb_buf1); + vfree(ubi->peb_buf2); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); put_device(&ubi->dev); return 0; diff --git a/trunk/drivers/mtd/ubi/eba.c b/trunk/drivers/mtd/ubi/eba.c index 2455d620d96b..cd26da8ad225 100644 --- a/trunk/drivers/mtd/ubi/eba.c +++ b/trunk/drivers/mtd/ubi/eba.c @@ -529,18 +529,18 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, data_size = offset + len; mutex_lock(&ubi->buf_mutex); - memset(ubi->peb_buf + offset, 0xFF, len); + memset(ubi->peb_buf1 + offset, 0xFF, len); /* Read everything before the area where the write failure happened */ if (offset > 0) { - err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); + err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); if (err && err != UBI_IO_BITFLIPS) goto out_unlock; } - memcpy(ubi->peb_buf + offset, buf, len); + memcpy(ubi->peb_buf1 + offset, buf, len); - err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); + err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); if (err) { mutex_unlock(&ubi->buf_mutex); goto write_error; @@ -979,7 +979,7 @@ static int is_error_sane(int err) * physical eraseblock @to. The @vid_hdr buffer may be changed by this * function. Returns: * o %0 in case of success; - * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; + * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_CANCEL_BITFLIPS, etc; * o a negative error code in case of failure. */ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, @@ -1053,13 +1053,13 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* * OK, now the LEB is locked and we can safely start moving it. Since - * this function utilizes the @ubi->peb_buf buffer which is shared + * this function utilizes the @ubi->peb_buf1 buffer which is shared * with some other functions - we lock the buffer by taking the * @ubi->buf_mutex. */ mutex_lock(&ubi->buf_mutex); dbg_wl("read %d bytes of data", aldata_size); - err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); if (err && err != UBI_IO_BITFLIPS) { ubi_warn("error %d while reading data from PEB %d", err, from); @@ -1079,10 +1079,10 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, */ if (vid_hdr->vol_type == UBI_VID_DYNAMIC) aldata_size = data_size = - ubi_calc_data_len(ubi, ubi->peb_buf, data_size); + ubi_calc_data_len(ubi, ubi->peb_buf1, data_size); cond_resched(); - crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size); cond_resched(); /* @@ -1116,12 +1116,12 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (is_error_sane(err)) err = MOVE_TARGET_RD_ERR; } else - err = MOVE_TARGET_BITFLIPS; + err = MOVE_CANCEL_BITFLIPS; goto out_unlock_buf; } if (data_size > 0) { - err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); + err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); if (err) { if (err == -EIO) err = MOVE_TARGET_WR_ERR; @@ -1134,8 +1134,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, * We've written the data and are going to read it back to make * sure it was written correctly. */ - memset(ubi->peb_buf, 0xFF, aldata_size); - err = ubi_io_read_data(ubi, ubi->peb_buf, to, 0, aldata_size); + + err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size); if (err) { if (err != UBI_IO_BITFLIPS) { ubi_warn("error %d while reading data back " @@ -1143,13 +1143,13 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (is_error_sane(err)) err = MOVE_TARGET_RD_ERR; } else - err = MOVE_TARGET_BITFLIPS; + err = MOVE_CANCEL_BITFLIPS; goto out_unlock_buf; } cond_resched(); - if (crc != crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size)) { + if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { ubi_warn("read data back from PEB %d and it is " "different", to); err = -EINVAL; diff --git a/trunk/drivers/mtd/ubi/io.c b/trunk/drivers/mtd/ubi/io.c index 43f1a0011a55..5cde4e5ca3e5 100644 --- a/trunk/drivers/mtd/ubi/io.c +++ b/trunk/drivers/mtd/ubi/io.c @@ -431,11 +431,11 @@ static int torture_peb(struct ubi_device *ubi, int pnum) goto out; /* Make sure the PEB contains only 0xFF bytes */ - err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); if (err) goto out; - err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size); + err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); if (err == 0) { ubi_err("erased PEB %d, but a non-0xFF byte found", pnum); @@ -444,17 +444,17 @@ static int torture_peb(struct ubi_device *ubi, int pnum) } /* Write a pattern and check it */ - memset(ubi->peb_buf, patterns[i], ubi->peb_size); - err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + memset(ubi->peb_buf1, patterns[i], ubi->peb_size); + err = ubi_io_write(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); if (err) goto out; - memset(ubi->peb_buf, ~patterns[i], ubi->peb_size); - err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + memset(ubi->peb_buf1, ~patterns[i], ubi->peb_size); + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); if (err) goto out; - err = ubi_check_pattern(ubi->peb_buf, patterns[i], + err = ubi_check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size); if (err == 0) { ubi_err("pattern %x checking failed for PEB %d", diff --git a/trunk/drivers/mtd/ubi/scan.c b/trunk/drivers/mtd/ubi/scan.c index 12c43b44f815..0cb17d936b5a 100644 --- a/trunk/drivers/mtd/ubi/scan.c +++ b/trunk/drivers/mtd/ubi/scan.c @@ -789,9 +789,9 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, int err; mutex_lock(&ubi->buf_mutex); - memset(ubi->peb_buf, 0x00, ubi->leb_size); + memset(ubi->peb_buf1, 0x00, ubi->leb_size); - err = ubi_io_read(ubi, ubi->peb_buf, pnum, ubi->leb_start, + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start, ubi->leb_size); if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { /* @@ -808,7 +808,7 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, if (err) goto out_unlock; - if (ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->leb_size)) + if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) goto out_unlock; ubi_err("PEB %d contains corrupted VID header, and the data does not " @@ -818,7 +818,7 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, dbg_msg("hexdump of PEB %d offset %d, length %d", pnum, ubi->leb_start, ubi->leb_size); ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, - ubi->peb_buf, ubi->leb_size, 1); + ubi->peb_buf1, ubi->leb_size, 1); err = 1; out_unlock: @@ -1174,7 +1174,7 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); if (!ech) - goto out_si; + goto out_slab; vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vidh) @@ -1235,6 +1235,8 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) ubi_free_vid_hdr(ubi, vidh); out_ech: kfree(ech); +out_slab: + kmem_cache_destroy(si->scan_leb_slab); out_si: ubi_scan_destroy_si(si); return ERR_PTR(err); @@ -1323,9 +1325,7 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si) } } - if (si->scan_leb_slab) - kmem_cache_destroy(si->scan_leb_slab); - + kmem_cache_destroy(si->scan_leb_slab); kfree(si); } diff --git a/trunk/drivers/mtd/ubi/ubi.h b/trunk/drivers/mtd/ubi/ubi.h index b162790790a9..d51d75d34446 100644 --- a/trunk/drivers/mtd/ubi/ubi.h +++ b/trunk/drivers/mtd/ubi/ubi.h @@ -118,7 +118,7 @@ enum { * PEB * MOVE_TARGET_WR_ERR: canceled because there was a write error to the target * PEB - * MOVE_TARGET_BITFLIPS: canceled because a bit-flip was detected in the + * MOVE_CANCEL_BITFLIPS: canceled because a bit-flip was detected in the * target PEB * MOVE_RETRY: retry scrubbing the PEB */ @@ -127,7 +127,7 @@ enum { MOVE_SOURCE_RD_ERR, MOVE_TARGET_RD_ERR, MOVE_TARGET_WR_ERR, - MOVE_TARGET_BITFLIPS, + MOVE_CANCEL_BITFLIPS, MOVE_RETRY, }; @@ -387,8 +387,9 @@ struct ubi_wl_entry; * time (MTD write buffer size) * @mtd: MTD device descriptor * - * @peb_buf: a buffer of PEB size used for different purposes - * @buf_mutex: protects @peb_buf + * @peb_buf1: a buffer of PEB size used for different purposes + * @peb_buf2: another buffer of PEB size used for different purposes + * @buf_mutex: protects @peb_buf1 and @peb_buf2 * @ckvol_mutex: serializes static volume checking when opening * * @dbg: debugging information for this UBI device @@ -470,7 +471,8 @@ struct ubi_device { int max_write_size; struct mtd_info *mtd; - void *peb_buf; + void *peb_buf1; + void *peb_buf2; struct mutex buf_mutex; struct mutex ckvol_mutex; diff --git a/trunk/drivers/mtd/ubi/wl.c b/trunk/drivers/mtd/ubi/wl.c index 7c1a9bf8ac86..0696e36b0539 100644 --- a/trunk/drivers/mtd/ubi/wl.c +++ b/trunk/drivers/mtd/ubi/wl.c @@ -350,19 +350,18 @@ static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) /** * find_wl_entry - find wear-leveling entry closest to certain erase counter. * @root: the RB-tree where to look for - * @diff: maximum possible difference from the smallest erase counter + * @max: highest possible erase counter * * This function looks for a wear leveling entry with erase counter closest to - * min + @diff, where min is the smallest erase counter. + * @max and less than @max. */ -static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int diff) +static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) { struct rb_node *p; struct ubi_wl_entry *e; - int max; e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); - max = e->ec + diff; + max += e->ec; p = root->rb_node; while (p) { @@ -390,7 +389,7 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int diff) */ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) { - int err; + int err, medium_ec; struct ubi_wl_entry *e, *first, *last; ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || @@ -428,7 +427,7 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) * For unknown data we pick a physical eraseblock with medium * erase counter. But we by no means can pick a physical * eraseblock with erase counter greater or equivalent than the - * lowest erase counter plus %WL_FREE_MAX_DIFF/2. + * lowest erase counter plus %WL_FREE_MAX_DIFF. */ first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb); @@ -437,8 +436,10 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) if (last->ec - first->ec < WL_FREE_MAX_DIFF) e = rb_entry(ubi->free.rb_node, struct ubi_wl_entry, u.rb); - else - e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF/2); + else { + medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; + e = find_wl_entry(&ubi->free, medium_ec); + } break; case UBI_SHORTTERM: /* @@ -798,7 +799,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, scrubbing = 1; goto out_not_moved; } - if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || + if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR || err == MOVE_TARGET_RD_ERR) { /* * Target PEB had bit-flips or write error - torture it. diff --git a/trunk/drivers/net/bonding/bond_main.c b/trunk/drivers/net/bonding/bond_main.c index b920d829692a..0730203a19f2 100644 --- a/trunk/drivers/net/bonding/bond_main.c +++ b/trunk/drivers/net/bonding/bond_main.c @@ -2573,16 +2573,12 @@ void bond_mii_monitor(struct work_struct *work) static int bond_has_this_ip(struct bonding *bond, __be32 ip) { struct vlan_entry *vlan; - struct net_device *vlan_dev; - if (ip == bond_confirm_addr(bond->dev, 0, ip)) + if (ip == bond->master_ip) return 1; list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - rcu_read_lock(); - vlan_dev = __vlan_find_dev_deep(bond->dev, vlan->vlan_id); - rcu_read_unlock(); - if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip)) + if (ip == vlan->vlan_ip) return 1; } @@ -2624,19 +2620,17 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) int i, vlan_id; __be32 *targets = bond->params.arp_targets; struct vlan_entry *vlan; - struct net_device *vlan_dev = NULL; + struct net_device *vlan_dev; struct rtable *rt; for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { - __be32 addr; if (!targets[i]) break; pr_debug("basa: target %x\n", targets[i]); if (!bond_vlan_used(bond)) { pr_debug("basa: empty vlan: arp_send\n"); - addr = bond_confirm_addr(bond->dev, targets[i], 0); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, 0); + bond->master_ip, 0); continue; } @@ -2661,9 +2655,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) if (rt->dst.dev == bond->dev) { ip_rt_put(rt); pr_debug("basa: rtdev == bond->dev: arp_send\n"); - addr = bond_confirm_addr(bond->dev, targets[i], 0); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, 0); + bond->master_ip, 0); continue; } @@ -2681,11 +2674,10 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) } } - if (vlan_id && vlan_dev) { + if (vlan_id) { ip_rt_put(rt); - addr = bond_confirm_addr(vlan_dev, targets[i], 0); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, vlan_id); + vlan->vlan_ip, vlan_id); continue; } @@ -3307,10 +3299,68 @@ static int bond_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } +/* + * bond_inetaddr_event: handle inetaddr notifier chain events. + * + * We keep track of device IPs primarily to use as source addresses in + * ARP monitor probes (rather than spewing out broadcasts all the time). + * + * We track one IP for the main device (if it has one), plus one per VLAN. + */ +static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; + struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); + struct bonding *bond; + struct vlan_entry *vlan; + + /* we only care about primary address */ + if(ifa->ifa_flags & IFA_F_SECONDARY) + return NOTIFY_DONE; + + list_for_each_entry(bond, &bn->dev_list, bond_list) { + if (bond->dev == event_dev) { + switch (event) { + case NETDEV_UP: + bond->master_ip = ifa->ifa_local; + return NOTIFY_OK; + case NETDEV_DOWN: + bond->master_ip = 0; + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } + } + + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { + vlan_dev = __vlan_find_dev_deep(bond->dev, + vlan->vlan_id); + if (vlan_dev == event_dev) { + switch (event) { + case NETDEV_UP: + vlan->vlan_ip = ifa->ifa_local; + return NOTIFY_OK; + case NETDEV_DOWN: + vlan->vlan_ip = 0; + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } + } + } + } + return NOTIFY_DONE; +} + static struct notifier_block bond_netdev_notifier = { .notifier_call = bond_netdev_event, }; +static struct notifier_block bond_inetaddr_notifier = { + .notifier_call = bond_inetaddr_event, +}; + /*---------------------------- Hashing Policies -----------------------------*/ /* @@ -4879,6 +4929,7 @@ static int __init bonding_init(void) } register_netdevice_notifier(&bond_netdev_notifier); + register_inetaddr_notifier(&bond_inetaddr_notifier); out: return res; err: @@ -4892,6 +4943,7 @@ static int __init bonding_init(void) static void __exit bonding_exit(void) { unregister_netdevice_notifier(&bond_netdev_notifier); + unregister_inetaddr_notifier(&bond_inetaddr_notifier); bond_destroy_debugfs(); diff --git a/trunk/drivers/net/bonding/bonding.h b/trunk/drivers/net/bonding/bonding.h index 9f2bae6616d3..1aecc37e5b4d 100644 --- a/trunk/drivers/net/bonding/bonding.h +++ b/trunk/drivers/net/bonding/bonding.h @@ -21,7 +21,6 @@ #include #include #include -#include #include "bond_3ad.h" #include "bond_alb.h" @@ -167,6 +166,7 @@ struct bond_parm_tbl { struct vlan_entry { struct list_head vlan_list; + __be32 vlan_ip; unsigned short vlan_id; }; @@ -232,6 +232,7 @@ struct bonding { struct list_head bond_list; struct netdev_hw_addr_list mc_list; int (*xmit_hash_policy)(struct sk_buff *, int); + __be32 master_ip; u16 rr_tx_counter; struct ad_bond_info ad_info; struct alb_bond_info alb_info; @@ -377,21 +378,6 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) -{ - struct in_device *in_dev; - __be32 addr = 0; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - - if (in_dev) - addr = inet_confirm_addr(in_dev, dst, local, RT_SCOPE_HOST); - - rcu_read_unlock(); - return addr; -} - struct bond_net; struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); diff --git a/trunk/drivers/net/ethernet/broadcom/cnic.c b/trunk/drivers/net/ethernet/broadcom/cnic.c index c95e7b5e2b85..7b65716b8734 100644 --- a/trunk/drivers/net/ethernet/broadcom/cnic.c +++ b/trunk/drivers/net/ethernet/broadcom/cnic.c @@ -47,7 +47,6 @@ #include "bnx2x/bnx2x_hsi.h" #include "../../../scsi/bnx2i/57xx_iscsi_constants.h" #include "../../../scsi/bnx2i/57xx_iscsi_hsi.h" -#include "../../../scsi/bnx2fc/bnx2fc_constants.h" #include "cnic.h" #include "cnic_defs.h" @@ -2548,7 +2547,7 @@ static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe) } kcqe.kcqe_op_flag = kcqe_op << KCQE_FLAGS_OPCODE_SHIFT; kcqe.kcqe_op_flag |= KCQE_FLAGS_LAYER_MASK_L5_FCOE; - kcqe.kcqe_info1 = FCOE_KCQE_COMPLETION_STATUS_PARITY_ERROR; + kcqe.kcqe_info1 = FCOE_KCQE_COMPLETION_STATUS_NIC_ERROR; kcqe.kcqe_info2 = cid; kcqe.kcqe_info0 = l5_cid; @@ -2559,7 +2558,7 @@ static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe) kcqe.kcqe_op_flag = (opcode + 0x10) << KCQE_FLAGS_OPCODE_SHIFT; kcqe.kcqe_op_flag |= KCQE_FLAGS_LAYER_MASK_L5_ISCSI; - kcqe.kcqe_info1 = ISCSI_KCQE_COMPLETION_STATUS_PARITY_ERR; + kcqe.kcqe_info1 = ISCSI_KCQE_COMPLETION_STATUS_NIC_ERROR; kcqe.kcqe_info2 = cid; cnic_get_l5_cid(cp, BNX2X_SW_CID(cid), &kcqe.kcqe_info0); @@ -2578,7 +2577,7 @@ static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe) kcqe.kcqe_op_flag = (kcqe_op << KCQE_FLAGS_OPCODE_SHIFT) | KCQE_FLAGS_LAYER_MASK_L4; - l4kcqe->status = L4_KCQE_COMPLETION_STATUS_PARITY_ERROR; + l4kcqe->status = L4_KCQE_COMPLETION_STATUS_NIC_ERROR; l4kcqe->cid = cid; cnic_get_l5_cid(cp, BNX2X_SW_CID(cid), &l4kcqe->conn_id); } else { @@ -3934,8 +3933,7 @@ static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe) case L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE: if (l4kcqe->status == 0) set_bit(SK_F_OFFLD_COMPLETE, &csk->flags); - else if (l4kcqe->status == - L4_KCQE_COMPLETION_STATUS_PARITY_ERROR) + else if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_NIC_ERROR) set_bit(SK_F_HW_ERR, &csk->flags); smp_mb__before_clear_bit(); @@ -3948,7 +3946,7 @@ static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe) case L4_KCQE_OPCODE_VALUE_RESET_COMP: case L5CM_RAMROD_CMD_ID_SEARCHER_DELETE: case L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD: - if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_PARITY_ERROR) + if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_NIC_ERROR) set_bit(SK_F_HW_ERR, &csk->flags); cp->close_conn(csk, opcode); diff --git a/trunk/drivers/net/ethernet/broadcom/cnic_defs.h b/trunk/drivers/net/ethernet/broadcom/cnic_defs.h index 382c98b0cc0c..06ca00266d70 100644 --- a/trunk/drivers/net/ethernet/broadcom/cnic_defs.h +++ b/trunk/drivers/net/ethernet/broadcom/cnic_defs.h @@ -35,6 +35,16 @@ #define L5CM_RAMROD_CMD_ID_SEARCHER_DELETE (L5CM_RAMROD_CMD_ID_BASE + 14) #define L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD (L5CM_RAMROD_CMD_ID_BASE + 15) +#define FCOE_KCQE_OPCODE_INIT_FUNC (0x10) +#define FCOE_KCQE_OPCODE_DESTROY_FUNC (0x11) +#define FCOE_KCQE_OPCODE_STAT_FUNC (0x12) +#define FCOE_KCQE_OPCODE_OFFLOAD_CONN (0x15) +#define FCOE_KCQE_OPCODE_ENABLE_CONN (0x16) +#define FCOE_KCQE_OPCODE_DISABLE_CONN (0x17) +#define FCOE_KCQE_OPCODE_DESTROY_CONN (0x18) +#define FCOE_KCQE_OPCODE_CQ_EVENT_NOTIFICATION (0x20) +#define FCOE_KCQE_OPCODE_FCOE_ERROR (0x21) + #define FCOE_RAMROD_CMD_ID_INIT_FUNC (FCOE_KCQE_OPCODE_INIT_FUNC) #define FCOE_RAMROD_CMD_ID_DESTROY_FUNC (FCOE_KCQE_OPCODE_DESTROY_FUNC) #define FCOE_RAMROD_CMD_ID_STAT_FUNC (FCOE_KCQE_OPCODE_STAT_FUNC) @@ -44,6 +54,23 @@ #define FCOE_RAMROD_CMD_ID_DESTROY_CONN (FCOE_KCQE_OPCODE_DESTROY_CONN) #define FCOE_RAMROD_CMD_ID_TERMINATE_CONN (0x81) +#define FCOE_KWQE_OPCODE_INIT1 (0) +#define FCOE_KWQE_OPCODE_INIT2 (1) +#define FCOE_KWQE_OPCODE_INIT3 (2) +#define FCOE_KWQE_OPCODE_OFFLOAD_CONN1 (3) +#define FCOE_KWQE_OPCODE_OFFLOAD_CONN2 (4) +#define FCOE_KWQE_OPCODE_OFFLOAD_CONN3 (5) +#define FCOE_KWQE_OPCODE_OFFLOAD_CONN4 (6) +#define FCOE_KWQE_OPCODE_ENABLE_CONN (7) +#define FCOE_KWQE_OPCODE_DISABLE_CONN (8) +#define FCOE_KWQE_OPCODE_DESTROY_CONN (9) +#define FCOE_KWQE_OPCODE_DESTROY (10) +#define FCOE_KWQE_OPCODE_STAT (11) + +#define FCOE_KCQE_COMPLETION_STATUS_ERROR (0x1) +#define FCOE_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAILURE (0x3) +#define FCOE_KCQE_COMPLETION_STATUS_NIC_ERROR (0x5) + /* KCQ (kernel completion queue) response op codes */ #define L4_KCQE_OPCODE_VALUE_CLOSE_COMP (53) #define L4_KCQE_OPCODE_VALUE_RESET_COMP (54) @@ -60,7 +87,6 @@ /* KCQ (kernel completion queue) completion status */ #define L4_KCQE_COMPLETION_STATUS_SUCCESS (0) #define L4_KCQE_COMPLETION_STATUS_NIC_ERROR (4) -#define L4_KCQE_COMPLETION_STATUS_PARITY_ERROR (0x81) #define L4_KCQE_COMPLETION_STATUS_TIMEOUT (0x93) #define L4_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAIL (0x83) diff --git a/trunk/drivers/net/ethernet/broadcom/cnic_if.h b/trunk/drivers/net/ethernet/broadcom/cnic_if.h index 289274e546be..60deb84d36bd 100644 --- a/trunk/drivers/net/ethernet/broadcom/cnic_if.h +++ b/trunk/drivers/net/ethernet/broadcom/cnic_if.h @@ -12,8 +12,8 @@ #ifndef CNIC_IF_H #define CNIC_IF_H -#define CNIC_MODULE_VERSION "2.5.10" -#define CNIC_MODULE_RELDATE "March 21, 2012" +#define CNIC_MODULE_VERSION "2.5.9" +#define CNIC_MODULE_RELDATE "Feb 8, 2012" #define CNIC_ULP_RDMA 0 #define CNIC_ULP_ISCSI 1 diff --git a/trunk/drivers/net/ethernet/broadcom/tg3.c b/trunk/drivers/net/ethernet/broadcom/tg3.c index 7b71387cf93c..b0657466041d 100644 --- a/trunk/drivers/net/ethernet/broadcom/tg3.c +++ b/trunk/drivers/net/ethernet/broadcom/tg3.c @@ -89,10 +89,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) #define DRV_MODULE_NAME "tg3" #define TG3_MAJ_NUM 3 -#define TG3_MIN_NUM 123 +#define TG3_MIN_NUM 122 #define DRV_MODULE_VERSION \ __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM) -#define DRV_MODULE_RELDATE "March 21, 2012" +#define DRV_MODULE_RELDATE "December 7, 2011" #define RESET_KIND_SHUTDOWN 0 #define RESET_KIND_INIT 1 @@ -5953,10 +5953,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) tpr->rx_std_prod_idx = std_prod_idx & tp->rx_std_ring_mask; tpr->rx_jmb_prod_idx = jmb_prod_idx & tp->rx_jmb_ring_mask; - if (tnapi != &tp->napi[1]) { - tp->rx_refill = true; + if (tnapi != &tp->napi[1]) napi_schedule(&tp->napi[1].napi); - } } return received; @@ -6136,7 +6134,6 @@ static int tg3_poll_work(struct tg3_napi *tnapi, int work_done, int budget) u32 std_prod_idx = dpr->rx_std_prod_idx; u32 jmb_prod_idx = dpr->rx_jmb_prod_idx; - tp->rx_refill = false; for (i = 1; i < tp->irq_cnt; i++) err |= tg3_rx_prodring_xfer(tp, dpr, &tp->napi[i].prodring); @@ -6200,25 +6197,9 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget) /* check for RX/TX work to do */ if (likely(sblk->idx[0].tx_consumer == tnapi->tx_cons && *(tnapi->rx_rcb_prod_idx) == tnapi->rx_rcb_ptr)) { - - /* This test here is not race free, but will reduce - * the number of interrupts by looping again. - */ - if (tnapi == &tp->napi[1] && tp->rx_refill) - continue; - napi_complete(napi); /* Reenable interrupts. */ tw32_mailbox(tnapi->int_mbox, tnapi->last_tag << 24); - - /* This test here is synchronized by napi_schedule() - * and napi_complete() to close the race condition. - */ - if (unlikely(tnapi == &tp->napi[1] && tp->rx_refill)) { - tw32(HOSTCC_MODE, tp->coalesce_mode | - HOSTCC_MODE_ENABLE | - tnapi->coal_now); - } mmiowb(); break; } diff --git a/trunk/drivers/net/ethernet/broadcom/tg3.h b/trunk/drivers/net/ethernet/broadcom/tg3.h index 93865f899a4f..66bcfca55261 100644 --- a/trunk/drivers/net/ethernet/broadcom/tg3.h +++ b/trunk/drivers/net/ethernet/broadcom/tg3.h @@ -3007,7 +3007,6 @@ struct tg3 { u32 rx_std_max_post; u32 rx_offset; u32 rx_pkt_map_sz; - bool rx_refill; /* begin "everything else" cacheline(s) section */ diff --git a/trunk/drivers/net/ethernet/marvell/sky2.c b/trunk/drivers/net/ethernet/marvell/sky2.c index 423a1a2a702e..82c2c86a1951 100644 --- a/trunk/drivers/net/ethernet/marvell/sky2.c +++ b/trunk/drivers/net/ethernet/marvell/sky2.c @@ -95,10 +95,6 @@ static int disable_msi = 0; module_param(disable_msi, int, 0); MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); -static int legacy_pme = 0; -module_param(legacy_pme, int, 0); -MODULE_PARM_DESC(legacy_pme, "Legacy power management"); - static DEFINE_PCI_DEVICE_TABLE(sky2_id_table) = { { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, /* SK-9Sxx */ { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, /* SK-9Exx */ @@ -871,13 +867,6 @@ static void sky2_wol_init(struct sky2_port *sky2) /* Disable PiG firmware */ sky2_write16(hw, B0_CTST, Y2_HW_WOL_OFF); - /* Needed by some broken BIOSes, use PCI rather than PCI-e for WOL */ - if (legacy_pme) { - u32 reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); - reg1 |= PCI_Y2_PME_LEGACY; - sky2_pci_write32(hw, PCI_DEV_REG1, reg1); - } - /* block receiver */ sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); sky2_read32(hw, B0_CTST); diff --git a/trunk/drivers/net/usb/usbnet.c b/trunk/drivers/net/usb/usbnet.c index b7b3f5b0d406..4b8b52ca09d8 100644 --- a/trunk/drivers/net/usb/usbnet.c +++ b/trunk/drivers/net/usb/usbnet.c @@ -493,7 +493,6 @@ static void rx_complete (struct urb *urb) if (netif_running (dev->net) && !test_bit (EVENT_RX_HALT, &dev->flags)) { rx_submit (dev, urb, GFP_ATOMIC); - usb_mark_last_busy(dev->udev); return; } usb_free_urb (urb); @@ -590,14 +589,6 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q) entry = (struct skb_data *) skb->cb; urb = entry->urb; - /* - * Get reference count of the URB to avoid it to be - * freed during usb_unlink_urb, which may trigger - * use-after-free problem inside usb_unlink_urb since - * usb_unlink_urb is always racing with .complete - * handler(include defer_bh). - */ - usb_get_urb(urb); spin_unlock_irqrestore(&q->lock, flags); // during some PM-driven resume scenarios, // these (async) unlinks complete immediately @@ -606,7 +597,6 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q) netdev_dbg(dev->net, "unlink urb err, %d\n", retval); else count++; - usb_put_urb(urb); spin_lock_irqsave(&q->lock, flags); } spin_unlock_irqrestore (&q->lock, flags); @@ -1038,6 +1028,7 @@ static void tx_complete (struct urb *urb) } usb_autopm_put_interface_async(dev->intf); + urb->dev = NULL; entry->state = tx_done; defer_bh(dev, skb, &dev->txq); } diff --git a/trunk/drivers/parisc/dino.c b/trunk/drivers/parisc/dino.c index 0610e91bceb2..7ff10c1e8664 100644 --- a/trunk/drivers/parisc/dino.c +++ b/trunk/drivers/parisc/dino.c @@ -553,6 +553,7 @@ dino_fixup_bus(struct pci_bus *bus) struct list_head *ln; struct pci_dev *dev; struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge)); + int port_base = HBA_PORT_BASE(dino_dev->hba.hba_num); DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n", __func__, bus, bus->secondary, @@ -598,6 +599,8 @@ dino_fixup_bus(struct pci_bus *bus) list_for_each(ln, &bus->devices) { + int i; + dev = pci_dev_b(ln); if (is_card_dino(&dino_dev->hba.dev->id)) dino_card_fixup(dev); @@ -609,6 +612,21 @@ dino_fixup_bus(struct pci_bus *bus) if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) continue; + /* Adjust the I/O Port space addresses */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + if (res->flags & IORESOURCE_IO) { + res->start |= port_base; + res->end |= port_base; + } +#ifdef __LP64__ + /* Sign Extend MMIO addresses */ + else if (res->flags & IORESOURCE_MEM) { + res->start |= F_EXTEND(0UL); + res->end |= F_EXTEND(0UL); + } +#endif + } /* null out the ROM resource if there is one (we don't * care about an expansion rom on parisc, since it * usually contains (x86) bios code) */ @@ -973,14 +991,11 @@ static int __init dino_probe(struct parisc_device *dev) dev->dev.platform_data = dino_dev; - pci_add_resource_offset(&resources, &dino_dev->hba.io_space, - HBA_PORT_BASE(dino_dev->hba.hba_num)); + pci_add_resource(&resources, &dino_dev->hba.io_space); if (dino_dev->hba.lmmio_space.flags) - pci_add_resource_offset(&resources, &dino_dev->hba.lmmio_space, - dino_dev->hba.lmmio_space_offset); + pci_add_resource(&resources, &dino_dev->hba.lmmio_space); if (dino_dev->hba.elmmio_space.flags) - pci_add_resource_offset(&resources, &dino_dev->hba.elmmio_space, - dino_dev->hba.lmmio_space_offset); + pci_add_resource(&resources, &dino_dev->hba.elmmio_space); if (dino_dev->hba.gmmio_space.flags) pci_add_resource(&resources, &dino_dev->hba.gmmio_space); diff --git a/trunk/drivers/parisc/lba_pci.c b/trunk/drivers/parisc/lba_pci.c index e8857647e210..d5f3d753a108 100644 --- a/trunk/drivers/parisc/lba_pci.c +++ b/trunk/drivers/parisc/lba_pci.c @@ -635,6 +635,7 @@ lba_fixup_bus(struct pci_bus *bus) u16 status; #endif struct lba_device *ldev = LBA_DEV(parisc_walk_tree(bus->bridge)); + int lba_portbase = HBA_PORT_BASE(ldev->hba.hba_num); DBG("lba_fixup_bus(0x%p) bus %d platform_data 0x%p\n", bus, bus->secondary, bus->bridge->platform_data); @@ -725,6 +726,27 @@ lba_fixup_bus(struct pci_bus *bus) if (!res->start) continue; + if (res->flags & IORESOURCE_IO) { + DBG("lba_fixup_bus() I/O Ports [%lx/%lx] -> ", + res->start, res->end); + res->start |= lba_portbase; + res->end |= lba_portbase; + DBG("[%lx/%lx]\n", res->start, res->end); + } else if (res->flags & IORESOURCE_MEM) { + /* + ** Convert PCI (IO_VIEW) addresses to + ** processor (PA_VIEW) addresses + */ + DBG("lba_fixup_bus() MMIO [%lx/%lx] -> ", + res->start, res->end); + res->start = PCI_HOST_ADDR(HBA_DATA(ldev), res->start); + res->end = PCI_HOST_ADDR(HBA_DATA(ldev), res->end); + DBG("[%lx/%lx]\n", res->start, res->end); + } else { + DBG("lba_fixup_bus() WTF? 0x%lx [%lx/%lx] XXX", + res->flags, res->start, res->end); + } + /* ** FIXME: this will result in whinging for devices ** that share expansion ROMs (think quad tulip), but @@ -1492,14 +1514,11 @@ lba_driver_probe(struct parisc_device *dev) lba_dev->hba.lmmio_space.flags = 0; } - pci_add_resource_offset(&resources, &lba_dev->hba.io_space, - HBA_PORT_BASE(lba_dev->hba.hba_num)); + pci_add_resource(&resources, &lba_dev->hba.io_space); if (lba_dev->hba.elmmio_space.start) - pci_add_resource_offset(&resources, &lba_dev->hba.elmmio_space, - lba_dev->hba.lmmio_space_offset); + pci_add_resource(&resources, &lba_dev->hba.elmmio_space); if (lba_dev->hba.lmmio_space.flags) - pci_add_resource_offset(&resources, &lba_dev->hba.lmmio_space, - lba_dev->hba.lmmio_space_offset); + pci_add_resource(&resources, &lba_dev->hba.lmmio_space); if (lba_dev->hba.gmmio_space.flags) pci_add_resource(&resources, &lba_dev->hba.gmmio_space); diff --git a/trunk/drivers/pci/Kconfig b/trunk/drivers/pci/Kconfig index 848bfb84c04c..37856f7c7781 100644 --- a/trunk/drivers/pci/Kconfig +++ b/trunk/drivers/pci/Kconfig @@ -31,19 +31,6 @@ config PCI_DEBUG When in doubt, say N. -config PCI_REALLOC_ENABLE_AUTO - bool "Enable PCI resource re-allocation detection" - depends on PCI - help - Say Y here if you want the PCI core to detect if PCI resource - re-allocation needs to be enabled. You can always use pci=realloc=on - or pci=realloc=off to override it. Note this feature is a no-op - unless PCI_IOV support is also enabled; in that case it will - automatically re-allocate PCI resources if SR-IOV BARs have not - been allocated by the BIOS. - - When in doubt, say N. - config PCI_STUB tristate "PCI Stub driver" depends on PCI diff --git a/trunk/drivers/pci/bus.c b/trunk/drivers/pci/bus.c index 4ce5ef2f2826..398f5d859791 100644 --- a/trunk/drivers/pci/bus.c +++ b/trunk/drivers/pci/bus.c @@ -18,36 +18,28 @@ #include "pci.h" -void pci_add_resource_offset(struct list_head *resources, struct resource *res, - resource_size_t offset) +void pci_add_resource(struct list_head *resources, struct resource *res) { - struct pci_host_bridge_window *window; + struct pci_bus_resource *bus_res; - window = kzalloc(sizeof(struct pci_host_bridge_window), GFP_KERNEL); - if (!window) { - printk(KERN_ERR "PCI: can't add host bridge window %pR\n", res); + bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL); + if (!bus_res) { + printk(KERN_ERR "PCI: can't add bus resource %pR\n", res); return; } - window->res = res; - window->offset = offset; - list_add_tail(&window->list, resources); -} -EXPORT_SYMBOL(pci_add_resource_offset); - -void pci_add_resource(struct list_head *resources, struct resource *res) -{ - pci_add_resource_offset(resources, res, 0); + bus_res->res = res; + list_add_tail(&bus_res->list, resources); } EXPORT_SYMBOL(pci_add_resource); void pci_free_resource_list(struct list_head *resources) { - struct pci_host_bridge_window *window, *tmp; + struct pci_bus_resource *bus_res, *tmp; - list_for_each_entry_safe(window, tmp, resources, list) { - list_del(&window->list); - kfree(window); + list_for_each_entry_safe(bus_res, tmp, resources, list) { + list_del(&bus_res->list); + kfree(bus_res); } } EXPORT_SYMBOL(pci_free_resource_list); diff --git a/trunk/drivers/pci/hotplug/acpiphp_glue.c b/trunk/drivers/pci/hotplug/acpiphp_glue.c index 806c44fa645a..9ddf69e3bbef 100644 --- a/trunk/drivers/pci/hotplug/acpiphp_glue.c +++ b/trunk/drivers/pci/hotplug/acpiphp_glue.c @@ -800,10 +800,20 @@ static int __ref enable_device(struct acpiphp_slot *slot) if (slot->flags & SLOT_ENABLED) goto err_exit; + /* sanity check: dev should be NULL when hot-plugged in */ + dev = pci_get_slot(bus, PCI_DEVFN(slot->device, 0)); + if (dev) { + /* This case shouldn't happen */ + err("pci_dev structure already exists.\n"); + pci_dev_put(dev); + retval = -1; + goto err_exit; + } + num = pci_scan_slot(bus, PCI_DEVFN(slot->device, 0)); if (num == 0) { - /* Maybe only part of funcs are added. */ - dbg("No new device found\n"); + err("No new device found\n"); + retval = -1; goto err_exit; } @@ -838,16 +848,11 @@ static int __ref enable_device(struct acpiphp_slot *slot) pci_bus_add_devices(bus); - slot->flags |= SLOT_ENABLED; list_for_each_entry(func, &slot->funcs, sibling) { dev = pci_get_slot(bus, PCI_DEVFN(slot->device, func->function)); - if (!dev) { - /* Do not set SLOT_ENABLED flag if some funcs - are not added. */ - slot->flags &= (~SLOT_ENABLED); + if (!dev) continue; - } if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE && dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) { @@ -862,6 +867,7 @@ static int __ref enable_device(struct acpiphp_slot *slot) pci_dev_put(dev); } + slot->flags |= SLOT_ENABLED; err_exit: return retval; @@ -886,12 +892,9 @@ static int disable_device(struct acpiphp_slot *slot) { struct acpiphp_func *func; struct pci_dev *pdev; - struct pci_bus *bus = slot->bridge->pci_bus; - /* The slot will be enabled when func 0 is added, so check - func 0 before disable the slot. */ - pdev = pci_get_slot(bus, PCI_DEVFN(slot->device, 0)); - if (!pdev) + /* is this slot already disabled? */ + if (!(slot->flags & SLOT_ENABLED)) goto err_exit; list_for_each_entry(func, &slot->funcs, sibling) { @@ -910,7 +913,7 @@ static int disable_device(struct acpiphp_slot *slot) disable_bridges(pdev->subordinate); pci_disable_device(pdev); } - __pci_remove_bus_device(pdev); + pci_remove_bus_device(pdev); pci_dev_put(pdev); } } @@ -1067,7 +1070,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus) res->end) { /* Could not assign a required resources * for this device, remove it */ - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); break; } } diff --git a/trunk/drivers/pci/hotplug/cpci_hotplug_pci.c b/trunk/drivers/pci/hotplug/cpci_hotplug_pci.c index ae853ccd0cd5..829c327cfb5e 100644 --- a/trunk/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/trunk/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -341,7 +341,7 @@ int cpci_unconfigure_slot(struct slot* slot) dev = pci_get_slot(slot->bus, PCI_DEVFN(PCI_SLOT(slot->devfn), i)); if (dev) { - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } } diff --git a/trunk/drivers/pci/hotplug/cpcihp_generic.c b/trunk/drivers/pci/hotplug/cpcihp_generic.c index 81af764c629b..fb3f84661bdc 100644 --- a/trunk/drivers/pci/hotplug/cpcihp_generic.c +++ b/trunk/drivers/pci/hotplug/cpcihp_generic.c @@ -62,7 +62,7 @@ #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg) /* local variables */ -static bool debug; +static int debug; static char *bridge; static u8 bridge_busnr; static u8 bridge_slot; diff --git a/trunk/drivers/pci/hotplug/cpqphp_pci.c b/trunk/drivers/pci/hotplug/cpqphp_pci.c index 1c8494021a42..6173b9a4544e 100644 --- a/trunk/drivers/pci/hotplug/cpqphp_pci.c +++ b/trunk/drivers/pci/hotplug/cpqphp_pci.c @@ -127,7 +127,7 @@ int cpqhp_unconfigure_device(struct pci_func* func) struct pci_dev* temp = pci_get_bus_and_slot(func->bus, PCI_DEVFN(func->device, j)); if (temp) { pci_dev_put(temp); - pci_stop_and_remove_bus_device(temp); + pci_remove_bus_device(temp); } } return 0; diff --git a/trunk/drivers/pci/hotplug/fakephp.c b/trunk/drivers/pci/hotplug/fakephp.c index a019c9a712be..17d10e2e8fb6 100644 --- a/trunk/drivers/pci/hotplug/fakephp.c +++ b/trunk/drivers/pci/hotplug/fakephp.c @@ -40,7 +40,7 @@ static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr, static void remove_callback(void *data) { - pci_stop_and_remove_bus_device((struct pci_dev *)data); + pci_remove_bus_device((struct pci_dev *)data); } static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr, diff --git a/trunk/drivers/pci/hotplug/ibmphp_core.c b/trunk/drivers/pci/hotplug/ibmphp_core.c index 4fda7e6a86a7..5506e0e8fbc0 100644 --- a/trunk/drivers/pci/hotplug/ibmphp_core.c +++ b/trunk/drivers/pci/hotplug/ibmphp_core.c @@ -721,7 +721,7 @@ static void ibm_unconfigure_device(struct pci_func *func) for (j = 0; j < 0x08; j++) { temp = pci_get_bus_and_slot(func->busno, (func->device << 3) | j); if (temp) { - pci_stop_and_remove_bus_device(temp); + pci_remove_bus_device(temp); pci_dev_put(temp); } } diff --git a/trunk/drivers/pci/hotplug/ibmphp_ebda.c b/trunk/drivers/pci/hotplug/ibmphp_ebda.c index 714ca5c4ed50..2850e64dedae 100644 --- a/trunk/drivers/pci/hotplug/ibmphp_ebda.c +++ b/trunk/drivers/pci/hotplug/ibmphp_ebda.c @@ -368,10 +368,8 @@ int __init ibmphp_access_ebda (void) debug ("rio blk id: %x\n", blk_id); rio_table_ptr = kzalloc(sizeof(struct rio_table_hdr), GFP_KERNEL); - if (!rio_table_ptr) { - rc = -ENOMEM; - goto out; - } + if (!rio_table_ptr) + return -ENOMEM; rio_table_ptr->ver_num = readb (io_mem + offset); rio_table_ptr->scal_count = readb (io_mem + offset + 1); rio_table_ptr->riodev_count = readb (io_mem + offset + 2); diff --git a/trunk/drivers/pci/hotplug/pciehp_hpc.c b/trunk/drivers/pci/hotplug/pciehp_hpc.c index a960faec1021..bcdbb1643621 100644 --- a/trunk/drivers/pci/hotplug/pciehp_hpc.c +++ b/trunk/drivers/pci/hotplug/pciehp_hpc.c @@ -241,79 +241,34 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) return retval; } -static bool check_link_active(struct controller *ctrl) +static inline int check_link_active(struct controller *ctrl) { - bool ret = false; - u16 lnk_status; - - if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status)) - return ret; - - ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); - - if (ret) - ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status); + u16 link_status; - return ret; + if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &link_status)) + return 0; + return !!(link_status & PCI_EXP_LNKSTA_DLLLA); } -static void __pcie_wait_link_active(struct controller *ctrl, bool active) +static void pcie_wait_link_active(struct controller *ctrl) { int timeout = 1000; - if (check_link_active(ctrl) == active) + if (check_link_active(ctrl)) return; while (timeout > 0) { msleep(10); timeout -= 10; - if (check_link_active(ctrl) == active) + if (check_link_active(ctrl)) return; } - ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n", - active ? "set" : "cleared"); -} - -static void pcie_wait_link_active(struct controller *ctrl) -{ - __pcie_wait_link_active(ctrl, true); -} - -static void pcie_wait_link_not_active(struct controller *ctrl) -{ - __pcie_wait_link_active(ctrl, false); -} - -static bool pci_bus_check_dev(struct pci_bus *bus, int devfn) -{ - u32 l; - int count = 0; - int delay = 1000, step = 20; - bool found = false; - - do { - found = pci_bus_read_dev_vendor_id(bus, devfn, &l, 0); - count++; - - if (found) - break; - - msleep(step); - delay -= step; - } while (delay > 0); - - if (count > 1 && pciehp_debug) - printk(KERN_DEBUG "pci %04x:%02x:%02x.%d id reading try %d times with interval %d ms to get %08x\n", - pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), count, step, l); - - return found; + ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n"); } int pciehp_check_link_status(struct controller *ctrl) { u16 lnk_status; int retval = 0; - bool found = false; /* * Data Link Layer Link Active Reporting must be capable for @@ -325,10 +280,13 @@ int pciehp_check_link_status(struct controller *ctrl) else msleep(1000); - /* wait 100ms before read pci conf, and try in 1s */ - msleep(100); - found = pci_bus_check_dev(ctrl->pcie->port->subordinate, - PCI_DEVFN(0, 0)); + /* + * Need to wait for 1000 ms after Data Link Layer Link Active + * (DLLLA) bit reads 1b before sending configuration request. + * We need it before checking Link Training (LT) bit becuase + * LT is still set even after DLLLA bit is set on some platform. + */ + msleep(1000); retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status); if (retval) { @@ -344,50 +302,19 @@ int pciehp_check_link_status(struct controller *ctrl) return retval; } - pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status); - - if (!found && !retval) - retval = -1; - - return retval; -} - -static int __pciehp_link_set(struct controller *ctrl, bool enable) -{ - u16 lnk_ctrl; - int retval = 0; - - retval = pciehp_readw(ctrl, PCI_EXP_LNKCTL, &lnk_ctrl); - if (retval) { - ctrl_err(ctrl, "Cannot read LNKCTRL register\n"); - return retval; - } - - if (enable) - lnk_ctrl &= ~PCI_EXP_LNKCTL_LD; - else - lnk_ctrl |= PCI_EXP_LNKCTL_LD; + /* + * If the port supports Link speeds greater than 5.0 GT/s, we + * must wait for 100 ms after Link training completes before + * sending configuration request. + */ + if (ctrl->pcie->port->subordinate->max_bus_speed > PCIE_SPEED_5_0GT) + msleep(100); - retval = pciehp_writew(ctrl, PCI_EXP_LNKCTL, lnk_ctrl); - if (retval) { - ctrl_err(ctrl, "Cannot write LNKCTRL register\n"); - return retval; - } - ctrl_dbg(ctrl, "%s: lnk_ctrl = %x\n", __func__, lnk_ctrl); + pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status); return retval; } -static int pciehp_link_enable(struct controller *ctrl) -{ - return __pciehp_link_set(ctrl, true); -} - -static int pciehp_link_disable(struct controller *ctrl) -{ - return __pciehp_link_set(ctrl, false); -} - int pciehp_get_attention_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; @@ -606,10 +533,6 @@ int pciehp_power_on_slot(struct slot * slot) ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); - retval = pciehp_link_enable(ctrl); - if (retval) - ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__); - return retval; } @@ -620,14 +543,6 @@ int pciehp_power_off_slot(struct slot * slot) u16 cmd_mask; int retval; - /* Disable the link at first */ - pciehp_link_disable(ctrl); - /* wait the link is down */ - if (ctrl->link_active_reporting) - pcie_wait_link_not_active(ctrl); - else - msleep(1000); - slot_cmd = POWER_OFF; cmd_mask = PCI_EXP_SLTCTL_PCC; retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); diff --git a/trunk/drivers/pci/hotplug/pciehp_pci.c b/trunk/drivers/pci/hotplug/pciehp_pci.c index 47d9dc06b109..a4031dfe938e 100644 --- a/trunk/drivers/pci/hotplug/pciehp_pci.c +++ b/trunk/drivers/pci/hotplug/pciehp_pci.c @@ -141,7 +141,7 @@ int pciehp_unconfigure_device(struct slot *p_slot) break; } } - pci_stop_and_remove_bus_device(temp); + pci_remove_bus_device(temp); /* * Ensure that no new Requests will be generated from * the device. diff --git a/trunk/drivers/pci/hotplug/rpadlpar_core.c b/trunk/drivers/pci/hotplug/rpadlpar_core.c index 1e117c2a3cad..c56a9413e1af 100644 --- a/trunk/drivers/pci/hotplug/rpadlpar_core.c +++ b/trunk/drivers/pci/hotplug/rpadlpar_core.c @@ -389,7 +389,7 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn) BUG_ON(!bus->self); pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self)); eeh_remove_bus_device(bus->self); - pci_stop_and_remove_bus_device(bus->self); + pci_remove_bus_device(bus->self); return 0; } diff --git a/trunk/drivers/pci/hotplug/sgi_hotplug.c b/trunk/drivers/pci/hotplug/sgi_hotplug.c index de573113c102..72d507b6a2aa 100644 --- a/trunk/drivers/pci/hotplug/sgi_hotplug.c +++ b/trunk/drivers/pci/hotplug/sgi_hotplug.c @@ -554,7 +554,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot) PCI_FUNC(func))); if (dev) { sn_bus_free_data(dev); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } } diff --git a/trunk/drivers/pci/hotplug/shpchp_pci.c b/trunk/drivers/pci/hotplug/shpchp_pci.c index df7e4bfadae3..a2ccfcd3c298 100644 --- a/trunk/drivers/pci/hotplug/shpchp_pci.c +++ b/trunk/drivers/pci/hotplug/shpchp_pci.c @@ -124,7 +124,7 @@ int shpchp_unconfigure_device(struct slot *p_slot) break; } } - pci_stop_and_remove_bus_device(temp); + pci_remove_bus_device(temp); pci_dev_put(temp); } return rc; diff --git a/trunk/drivers/pci/iov.c b/trunk/drivers/pci/iov.c index 6554e1a0f634..0dab5ecf61bb 100644 --- a/trunk/drivers/pci/iov.c +++ b/trunk/drivers/pci/iov.c @@ -142,7 +142,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) failed1: pci_dev_put(dev); mutex_lock(&iov->dev->sriov->lock); - pci_stop_and_remove_bus_device(virtfn); + pci_remove_bus_device(virtfn); virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); mutex_unlock(&iov->dev->sriov->lock); @@ -173,16 +173,10 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) sprintf(buf, "virtfn%u", id); sysfs_remove_link(&dev->dev.kobj, buf); - /* - * pci_stop_dev() could have been called for this virtfn already, - * so the directory for the virtfn may have been removed before. - * Double check to avoid spurious sysfs warnings. - */ - if (virtfn->dev.kobj.sd) - sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); mutex_lock(&iov->dev->sriov->lock); - pci_stop_and_remove_bus_device(virtfn); + pci_remove_bus_device(virtfn); virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); mutex_unlock(&iov->dev->sriov->lock); diff --git a/trunk/drivers/pci/pci-driver.c b/trunk/drivers/pci/pci-driver.c index 6b54b23b990b..8d9616b821ca 100644 --- a/trunk/drivers/pci/pci-driver.c +++ b/trunk/drivers/pci/pci-driver.c @@ -419,16 +419,6 @@ static void pci_device_shutdown(struct device *dev) drv->shutdown(pci_dev); pci_msi_shutdown(pci_dev); pci_msix_shutdown(pci_dev); - - /* - * Devices may be enabled to wake up by runtime PM, but they need not - * be supposed to wake up the system from its "power off" state (e.g. - * ACPI S5). Therefore disable wakeup for all devices that aren't - * supposed to wake up the system at this point. The state argument - * will be ignored by pci_enable_wake(). - */ - if (!device_may_wakeup(dev)) - pci_enable_wake(pci_dev, PCI_UNKNOWN, false); } #ifdef CONFIG_PM diff --git a/trunk/drivers/pci/pci-sysfs.c b/trunk/drivers/pci/pci-sysfs.c index a55e248618cd..a3cd8cad532a 100644 --- a/trunk/drivers/pci/pci-sysfs.c +++ b/trunk/drivers/pci/pci-sysfs.c @@ -330,7 +330,7 @@ static void remove_callback(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); mutex_lock(&pci_remove_rescan_mutex); - pci_stop_and_remove_bus_device(pdev); + pci_remove_bus_device(pdev); mutex_unlock(&pci_remove_rescan_mutex); } @@ -366,10 +366,7 @@ dev_bus_rescan_store(struct device *dev, struct device_attribute *attr, if (val) { mutex_lock(&pci_remove_rescan_mutex); - if (!pci_is_root_bus(bus) && list_empty(&bus->devices)) - pci_rescan_bus_bridge_resize(bus->self); - else - pci_rescan_bus(bus); + pci_rescan_bus(bus); mutex_unlock(&pci_remove_rescan_mutex); } return count; diff --git a/trunk/drivers/pci/pci.c b/trunk/drivers/pci/pci.c index 815674415267..053670e09e2b 100644 --- a/trunk/drivers/pci/pci.c +++ b/trunk/drivers/pci/pci.c @@ -94,9 +94,6 @@ u8 pci_cache_line_size; */ unsigned int pcibios_max_latency = 255; -/* If set, the PCIe ARI capability will not be used. */ -static bool pcie_ari_disabled; - /** * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children * @bus: pointer to PCI bus structure to search @@ -828,19 +825,6 @@ EXPORT_SYMBOL(pci_choose_state); #define pcie_cap_has_sltctl2(type, flags) \ ((flags & PCI_EXP_FLAGS_VERS) > 1) -static struct pci_cap_saved_state *pci_find_saved_cap( - struct pci_dev *pci_dev, char cap) -{ - struct pci_cap_saved_state *tmp; - struct hlist_node *pos; - - hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { - if (tmp->cap.cap_nr == cap) - return tmp; - } - return NULL; -} - static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; @@ -975,7 +959,6 @@ void pci_restore_state(struct pci_dev *dev) { int i; u32 val; - int tries; if (!dev->state_saved) return; @@ -990,16 +973,12 @@ void pci_restore_state(struct pci_dev *dev) */ for (i = 15; i >= 0; i--) { pci_read_config_dword(dev, i * 4, &val); - tries = 10; - while (tries && val != dev->saved_config_space[i]) { + if (val != dev->saved_config_space[i]) { dev_dbg(&dev->dev, "restoring config " "space at offset %#x (was %#x, writing %#x)\n", i, val, (int)dev->saved_config_space[i]); pci_write_config_dword(dev,i * 4, dev->saved_config_space[i]); - pci_read_config_dword(dev, i * 4, &val); - mdelay(10); - tries--; } } pci_restore_pcix_state(dev); @@ -1885,12 +1864,6 @@ void platform_pci_wakeup_init(struct pci_dev *dev) platform_pci_sleep_wake(dev, false); } -static void pci_add_saved_cap(struct pci_dev *pci_dev, - struct pci_cap_saved_state *new_cap) -{ - hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space); -} - /** * pci_add_save_buffer - allocate buffer for saving given capability registers * @dev: the PCI device @@ -1938,15 +1911,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) "unable to preallocate PCI-X save buffer\n"); } -void pci_free_cap_save_buffers(struct pci_dev *dev) -{ - struct pci_cap_saved_state *tmp; - struct hlist_node *pos, *n; - - hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next) - kfree(tmp); -} - /** * pci_enable_ari - enable ARI forwarding if hardware support it * @dev: the PCI device @@ -1958,7 +1922,7 @@ void pci_enable_ari(struct pci_dev *dev) u16 flags, ctrl; struct pci_dev *bridge; - if (pcie_ari_disabled || !pci_is_pcie(dev) || dev->devfn) + if (!pci_is_pcie(dev) || dev->devfn) return; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI); @@ -3697,68 +3661,6 @@ int pci_is_reassigndev(struct pci_dev *dev) return (pci_specified_resource_alignment(dev) != 0); } -/* - * This function disables memory decoding and releases memory resources - * of the device specified by kernel's boot parameter 'pci=resource_alignment='. - * It also rounds up size to specified alignment. - * Later on, the kernel will assign page-aligned memory resource back - * to the device. - */ -void pci_reassigndev_resource_alignment(struct pci_dev *dev) -{ - int i; - struct resource *r; - resource_size_t align, size; - u16 command; - - if (!pci_is_reassigndev(dev)) - return; - - if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && - (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { - dev_warn(&dev->dev, - "Can't reassign resources to host bridge.\n"); - return; - } - - dev_info(&dev->dev, - "Disabling memory decoding and releasing memory resources.\n"); - pci_read_config_word(dev, PCI_COMMAND, &command); - command &= ~PCI_COMMAND_MEMORY; - pci_write_config_word(dev, PCI_COMMAND, command); - - align = pci_specified_resource_alignment(dev); - for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) { - r = &dev->resource[i]; - if (!(r->flags & IORESOURCE_MEM)) - continue; - size = resource_size(r); - if (size < align) { - size = align; - dev_info(&dev->dev, - "Rounding up size of resource #%d to %#llx.\n", - i, (unsigned long long)size); - } - r->end = size - 1; - r->start = 0; - } - /* Need to disable bridge's resource window, - * to enable the kernel to reassign new resource - * window later on. - */ - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && - (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { - for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { - r = &dev->resource[i]; - if (!(r->flags & IORESOURCE_MEM)) - continue; - r->end = resource_size(r) - 1; - r->start = 0; - } - pci_disable_bridge_window(dev); - } -} - ssize_t pci_set_resource_alignment_param(const char *buf, size_t count) { if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1) @@ -3837,14 +3739,10 @@ static int __init pci_setup(char *str) pci_no_msi(); } else if (!strcmp(str, "noaer")) { pci_no_aer(); - } else if (!strncmp(str, "realloc=", 8)) { - pci_realloc_get_opt(str + 8); } else if (!strncmp(str, "realloc", 7)) { - pci_realloc_get_opt("on"); + pci_realloc(); } else if (!strcmp(str, "nodomains")) { pci_no_domains(); - } else if (!strncmp(str, "noari", 5)) { - pcie_ari_disabled = true; } else if (!strncmp(str, "cbiosize=", 9)) { pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { diff --git a/trunk/drivers/pci/pci.h b/trunk/drivers/pci/pci.h index e4943479b234..1009a5e88e53 100644 --- a/trunk/drivers/pci/pci.h +++ b/trunk/drivers/pci/pci.h @@ -73,7 +73,6 @@ extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); -void pci_free_cap_save_buffers(struct pci_dev *dev); static inline void pci_wakeup_event(struct pci_dev *dev) { @@ -149,7 +148,7 @@ static inline void pci_no_msi(void) { } static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { } #endif -void pci_realloc_get_opt(char *); +extern void pci_realloc(void); static inline int pci_no_d1d2(struct pci_dev *dev) { @@ -208,8 +207,6 @@ enum pci_bar_type { pci_bar_mem64, /* A 64-bit memory BAR */ }; -bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, - int crs_timeout); extern int pci_setup_device(struct pci_dev *dev); extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); @@ -228,8 +225,11 @@ static inline int pci_ari_enabled(struct pci_bus *bus) return bus->self && bus->self->ari_enabled; } -void pci_reassigndev_resource_alignment(struct pci_dev *dev); +#ifdef CONFIG_PCI_QUIRKS +extern int pci_is_reassigndev(struct pci_dev *dev); +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); extern void pci_disable_bridge_window(struct pci_dev *dev); +#endif /* Single Root I/O Virtualization */ struct pci_sriov { diff --git a/trunk/drivers/pci/pcie/Kconfig b/trunk/drivers/pci/pcie/Kconfig index 6c8bc5809787..72962cc92e0a 100644 --- a/trunk/drivers/pci/pcie/Kconfig +++ b/trunk/drivers/pci/pcie/Kconfig @@ -55,31 +55,6 @@ config PCIEASPM_DEBUG This enables PCI Express ASPM debug support. It will add per-device interface to control ASPM. -choice - prompt "Default ASPM policy" - default PCIEASPM_DEFAULT - depends on PCIEASPM - -config PCIEASPM_DEFAULT - bool "BIOS default" - depends on PCIEASPM - help - Use the BIOS defaults for PCI Express ASPM. - -config PCIEASPM_POWERSAVE - bool "Powersave" - depends on PCIEASPM - help - Enable PCI Express ASPM L0s and L1 where possible, even if the - BIOS did not. - -config PCIEASPM_PERFORMANCE - bool "Performance" - depends on PCIEASPM - help - Disable PCI Express ASPM L0s and L1, even if the BIOS enabled them. -endchoice - config PCIE_PME def_bool y depends on PCIEPORTBUS && PM_RUNTIME && EXPERIMENTAL && ACPI diff --git a/trunk/drivers/pci/pcie/aspm.c b/trunk/drivers/pci/pcie/aspm.c index 4bdef24cd412..24f049e73952 100644 --- a/trunk/drivers/pci/pcie/aspm.c +++ b/trunk/drivers/pci/pcie/aspm.c @@ -76,15 +76,7 @@ static LIST_HEAD(link_list); #define POLICY_DEFAULT 0 /* BIOS default setting */ #define POLICY_PERFORMANCE 1 /* high performance */ #define POLICY_POWERSAVE 2 /* high power saving */ - -#ifdef CONFIG_PCIEASPM_PERFORMANCE -static int aspm_policy = POLICY_PERFORMANCE; -#elif defined CONFIG_PCIEASPM_POWERSAVE -static int aspm_policy = POLICY_POWERSAVE; -#else static int aspm_policy; -#endif - static const char *policy_str[] = { [POLICY_DEFAULT] = "default", [POLICY_PERFORMANCE] = "performance", diff --git a/trunk/drivers/pci/pcie/portdrv.h b/trunk/drivers/pci/pcie/portdrv.h index eea2ca2375e6..bd00a01aef14 100644 --- a/trunk/drivers/pci/pcie/portdrv.h +++ b/trunk/drivers/pci/pcie/portdrv.h @@ -34,18 +34,6 @@ struct pci_dev; extern void pcie_clear_root_pme_status(struct pci_dev *dev); -#ifdef CONFIG_HOTPLUG_PCI_PCIE -extern bool pciehp_msi_disabled; - -static inline bool pciehp_no_msi(void) -{ - return pciehp_msi_disabled; -} - -#else /* !CONFIG_HOTPLUG_PCI_PCIE */ -static inline bool pciehp_no_msi(void) { return false; } -#endif /* !CONFIG_HOTPLUG_PCI_PCIE */ - #ifdef CONFIG_PCIE_PME extern bool pcie_pme_msi_disabled; diff --git a/trunk/drivers/pci/pcie/portdrv_core.c b/trunk/drivers/pci/pcie/portdrv_core.c index 2f589a54f9bd..595654a1a6a6 100644 --- a/trunk/drivers/pci/pcie/portdrv_core.c +++ b/trunk/drivers/pci/pcie/portdrv_core.c @@ -19,17 +19,6 @@ #include "../pci.h" #include "portdrv.h" -bool pciehp_msi_disabled; - -static int __init pciehp_setup(char *str) -{ - if (!strncmp(str, "nomsi", 5)) - pciehp_msi_disabled = true; - - return 1; -} -__setup("pcie_hp=", pciehp_setup); - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -200,9 +189,8 @@ static int init_service_irqs(struct pci_dev *dev, int *irqs, int mask) { int i, irq = -1; - /* We have to use INTx if MSI cannot be used for PCIe PME or pciehp. */ - if (((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) || - ((mask & PCIE_PORT_SERVICE_HP) && pciehp_no_msi())) { + /* We have to use INTx if MSI cannot be used for PCIe PME. */ + if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) { if (dev->pin) irq = dev->irq; goto no_msi; diff --git a/trunk/drivers/pci/probe.c b/trunk/drivers/pci/probe.c index 5e1ca3c58a7d..71eac9cd724d 100644 --- a/trunk/drivers/pci/probe.c +++ b/trunk/drivers/pci/probe.c @@ -15,8 +15,6 @@ #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 -static LIST_HEAD(pci_host_bridges); - /* Ugh. Need to stop exporting this to modules. */ LIST_HEAD(pci_root_buses); EXPORT_SYMBOL(pci_root_buses); @@ -44,82 +42,6 @@ int no_pci_devices(void) } EXPORT_SYMBOL(no_pci_devices); -static struct pci_host_bridge *pci_host_bridge(struct pci_dev *dev) -{ - struct pci_bus *bus; - struct pci_host_bridge *bridge; - - bus = dev->bus; - while (bus->parent) - bus = bus->parent; - - list_for_each_entry(bridge, &pci_host_bridges, list) { - if (bridge->bus == bus) - return bridge; - } - - return NULL; -} - -static bool resource_contains(struct resource *res1, struct resource *res2) -{ - return res1->start <= res2->start && res1->end >= res2->end; -} - -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res) -{ - struct pci_host_bridge *bridge = pci_host_bridge(dev); - struct pci_host_bridge_window *window; - resource_size_t offset = 0; - - list_for_each_entry(window, &bridge->windows, list) { - if (resource_type(res) != resource_type(window->res)) - continue; - - if (resource_contains(window->res, res)) { - offset = window->offset; - break; - } - } - - region->start = res->start - offset; - region->end = res->end - offset; -} -EXPORT_SYMBOL(pcibios_resource_to_bus); - -static bool region_contains(struct pci_bus_region *region1, - struct pci_bus_region *region2) -{ - return region1->start <= region2->start && region1->end >= region2->end; -} - -void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region) -{ - struct pci_host_bridge *bridge = pci_host_bridge(dev); - struct pci_host_bridge_window *window; - struct pci_bus_region bus_region; - resource_size_t offset = 0; - - list_for_each_entry(window, &bridge->windows, list) { - if (resource_type(res) != resource_type(window->res)) - continue; - - bus_region.start = window->res->start - window->offset; - bus_region.end = window->res->end - window->offset; - - if (region_contains(&bus_region, region)) { - offset = window->offset; - break; - } - } - - res->start = region->start + offset; - res->end = region->end + offset; -} -EXPORT_SYMBOL(pcibios_bus_to_resource); - /* * PCI Bus Class */ @@ -213,7 +135,6 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, { u32 l, sz, mask; u16 orig_cmd; - struct pci_bus_region region; mask = type ? PCI_ROM_ADDRESS_MASK : ~0; @@ -293,13 +214,11 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, /* Address above 32-bit boundary; disable the BAR */ pci_write_config_dword(dev, pos, 0); pci_write_config_dword(dev, pos + 4, 0); - region.start = 0; - region.end = sz64; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = 0; + res->end = sz64; } else { - region.start = l64; - region.end = l64 + sz64; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = l64; + res->end = l64 + sz64; dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res); } @@ -309,9 +228,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, if (!sz) goto fail; - region.start = l; - region.end = l + sz; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = l; + res->end = l + sz; dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res); } @@ -348,8 +266,7 @@ static void __devinit pci_read_bridge_io(struct pci_bus *child) struct pci_dev *dev = child->self; u8 io_base_lo, io_limit_lo; unsigned long base, limit; - struct pci_bus_region region; - struct resource *res, res2; + struct resource *res; res = child->resource[0]; pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo); @@ -367,14 +284,10 @@ static void __devinit pci_read_bridge_io(struct pci_bus *child) if (base && base <= limit) { res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO; - res2.flags = res->flags; - region.start = base; - region.end = limit + 0xfff; - pcibios_bus_to_resource(dev, &res2, ®ion); if (!res->start) - res->start = res2.start; + res->start = base; if (!res->end) - res->end = res2.end; + res->end = limit + 0xfff; dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -384,7 +297,6 @@ static void __devinit pci_read_bridge_mmio(struct pci_bus *child) struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; unsigned long base, limit; - struct pci_bus_region region; struct resource *res; res = child->resource[1]; @@ -394,9 +306,8 @@ static void __devinit pci_read_bridge_mmio(struct pci_bus *child) limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16; if (base && base <= limit) { res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; - region.start = base; - region.end = limit + 0xfffff; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = base; + res->end = limit + 0xfffff; dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -406,7 +317,6 @@ static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child) struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; unsigned long base, limit; - struct pci_bus_region region; struct resource *res; res = child->resource[2]; @@ -443,9 +353,8 @@ static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child) IORESOURCE_MEM | IORESOURCE_PREFETCH; if (res->flags & PCI_PREF_RANGE_TYPE_64) res->flags |= IORESOURCE_MEM_64; - region.start = base; - region.end = limit + 0xfffff; - pcibios_bus_to_resource(dev, res, ®ion); + res->start = base; + res->end = limit + 0xfffff; dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -991,8 +900,6 @@ int pci_setup_device(struct pci_dev *dev) u8 hdr_type; struct pci_slot *slot; int pos = 0; - struct pci_bus_region region; - struct resource *res; if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) return -EIO; @@ -1019,10 +926,12 @@ int pci_setup_device(struct pci_dev *dev) pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); dev->revision = class & 0xff; - dev->class = class >> 8; /* upper 3 bytes */ + class >>= 8; /* upper 3 bytes */ + dev->class = class; + class >>= 8; - dev_printk(KERN_DEBUG, &dev->dev, "[%04x:%04x] type %02x class %#08x\n", - dev->vendor, dev->device, dev->hdr_type, dev->class); + dev_printk(KERN_DEBUG, &dev->dev, "[%04x:%04x] type %d class %#08x\n", + dev->vendor, dev->device, dev->hdr_type, class); /* need to have dev->class ready */ dev->cfg_size = pci_cfg_space_size(dev); @@ -1054,28 +963,20 @@ int pci_setup_device(struct pci_dev *dev) u8 progif; pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); if ((progif & 1) == 0) { - region.start = 0x1F0; - region.end = 0x1F7; - res = &dev->resource[0]; - res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); - region.start = 0x3F6; - region.end = 0x3F6; - res = &dev->resource[1]; - res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); + dev->resource[0].start = 0x1F0; + dev->resource[0].end = 0x1F7; + dev->resource[0].flags = LEGACY_IO_RESOURCE; + dev->resource[1].start = 0x3F6; + dev->resource[1].end = 0x3F6; + dev->resource[1].flags = LEGACY_IO_RESOURCE; } if ((progif & 4) == 0) { - region.start = 0x170; - region.end = 0x177; - res = &dev->resource[2]; - res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); - region.start = 0x376; - region.end = 0x376; - res = &dev->resource[3]; - res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); + dev->resource[2].start = 0x170; + dev->resource[2].end = 0x177; + dev->resource[2].flags = LEGACY_IO_RESOURCE; + dev->resource[3].start = 0x376; + dev->resource[3].end = 0x376; + dev->resource[3].flags = LEGACY_IO_RESOURCE; } } break; @@ -1112,8 +1013,8 @@ int pci_setup_device(struct pci_dev *dev) return -EIO; bad: - dev_err(&dev->dev, "ignoring class %#08x (doesn't match header " - "type %02x)\n", dev->class, dev->hdr_type); + dev_err(&dev->dev, "ignoring class %02x (doesn't match header " + "type %02x)\n", class, dev->hdr_type); dev->class = PCI_CLASS_NOT_DEFINED; } @@ -1125,7 +1026,6 @@ static void pci_release_capabilities(struct pci_dev *dev) { pci_vpd_release(dev); pci_iov_release(dev); - pci_free_cap_save_buffers(dev); } /** @@ -1218,54 +1118,40 @@ struct pci_dev *alloc_pci_dev(void) } EXPORT_SYMBOL(alloc_pci_dev); -bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, - int crs_timeout) +/* + * Read the config data for a PCI device, sanity-check it + * and fill in the dev structure... + */ +static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) { + struct pci_dev *dev; + u32 l; int delay = 1; - if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) - return false; + if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l)) + return NULL; /* some broken boards return 0 or ~0 if a slot is empty: */ - if (*l == 0xffffffff || *l == 0x00000000 || - *l == 0x0000ffff || *l == 0xffff0000) - return false; + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + return NULL; /* Configuration request Retry Status */ - while (*l == 0xffff0001) { - if (!crs_timeout) - return false; - + while (l == 0xffff0001) { msleep(delay); delay *= 2; - if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) - return false; + if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l)) + return NULL; /* Card hasn't responded in 60 seconds? Must be stuck. */ - if (delay > crs_timeout) { + if (delay > 60 * 1000) { printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not " "responding\n", pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); - return false; + return NULL; } } - return true; -} -EXPORT_SYMBOL(pci_bus_read_dev_vendor_id); - -/* - * Read the config data for a PCI device, sanity-check it - * and fill in the dev structure... - */ -static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) -{ - struct pci_dev *dev; - u32 l; - - if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000)) - return NULL; - dev = alloc_pci_dev(); if (!dev) return NULL; @@ -1326,9 +1212,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) /* Fix up broken headers */ pci_fixup_device(pci_fixup_header, dev); - /* moved out from quirk header fixup code */ - pci_reassigndev_resource_alignment(dev); - /* Clear the state_saved flag. */ dev->state_saved = false; @@ -1647,27 +1530,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) struct pci_bus *pci_create_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources) { - int error; - struct pci_host_bridge *bridge; + int error, i; struct pci_bus *b, *b2; struct device *dev; - struct pci_host_bridge_window *window, *n; + struct pci_bus_resource *bus_res, *n; struct resource *res; - resource_size_t offset; - char bus_addr[64]; - char *fmt; - - bridge = kzalloc(sizeof(*bridge), GFP_KERNEL); - if (!bridge) - return NULL; b = pci_alloc_bus(); if (!b) - goto err_bus; + return NULL; dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - goto err_dev; + if (!dev) { + kfree(b); + return NULL; + } b->sysdata = sysdata; b->ops = ops; @@ -1679,6 +1556,10 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, goto err_out; } + down_write(&pci_bus_sem); + list_add_tail(&b->node, &pci_root_buses); + up_write(&pci_bus_sem); + dev->parent = parent; dev->release = pci_release_bus_bridge_dev; dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus); @@ -1704,53 +1585,31 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, b->number = b->secondary = bus; - bridge->bus = b; - INIT_LIST_HEAD(&bridge->windows); + /* Add initial resources to the bus */ + list_for_each_entry_safe(bus_res, n, resources, list) + list_move_tail(&bus_res->list, &b->resources); if (parent) dev_info(parent, "PCI host bridge to bus %s\n", dev_name(&b->dev)); else printk(KERN_INFO "PCI host bridge to bus %s\n", dev_name(&b->dev)); - /* Add initial resources to the bus */ - list_for_each_entry_safe(window, n, resources, list) { - list_move_tail(&window->list, &bridge->windows); - res = window->res; - offset = window->offset; - pci_bus_add_resource(b, res, 0); - if (offset) { - if (resource_type(res) == IORESOURCE_IO) - fmt = " (bus address [%#06llx-%#06llx])"; - else - fmt = " (bus address [%#010llx-%#010llx])"; - snprintf(bus_addr, sizeof(bus_addr), fmt, - (unsigned long long) (res->start - offset), - (unsigned long long) (res->end - offset)); - } else - bus_addr[0] = '\0'; - dev_info(&b->dev, "root bus resource %pR%s\n", res, bus_addr); + pci_bus_for_each_resource(b, res, i) { + if (res) + dev_info(&b->dev, "root bus resource %pR\n", res); } - down_write(&pci_bus_sem); - list_add_tail(&bridge->list, &pci_host_bridges); - list_add_tail(&b->node, &pci_root_buses); - up_write(&pci_bus_sem); - return b; class_dev_reg_err: device_unregister(dev); dev_reg_err: down_write(&pci_bus_sem); - list_del(&bridge->list); list_del(&b->node); up_write(&pci_bus_sem); err_out: kfree(dev); -err_dev: kfree(b); -err_bus: - kfree(bridge); return NULL; } @@ -1808,29 +1667,36 @@ EXPORT_SYMBOL(pci_scan_bus); #ifdef CONFIG_HOTPLUG /** - * pci_rescan_bus_bridge_resize - scan a PCI bus for devices. - * @bridge: PCI bridge for the bus to scan + * pci_rescan_bus - scan a PCI bus for devices. + * @bus: PCI bus to scan * - * Scan a PCI bus and child buses for new devices, add them, - * and enable them, resizing bridge mmio/io resource if necessary - * and possible. The caller must ensure the child devices are already - * removed for resizing to occur. + * Scan a PCI bus and child buses for new devices, adds them, + * and enables them. * * Returns the max number of subordinate bus discovered. */ -unsigned int __ref pci_rescan_bus_bridge_resize(struct pci_dev *bridge) +unsigned int __ref pci_rescan_bus(struct pci_bus *bus) { unsigned int max; - struct pci_bus *bus = bridge->subordinate; + struct pci_dev *dev; max = pci_scan_child_bus(bus); - pci_assign_unassigned_bridge_resources(bridge); + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (dev->subordinate) + pci_bus_size_bridges(dev->subordinate); + up_read(&pci_bus_sem); + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); pci_bus_add_devices(bus); return max; } +EXPORT_SYMBOL_GPL(pci_rescan_bus); EXPORT_SYMBOL(pci_add_new_bus); EXPORT_SYMBOL(pci_scan_slot); diff --git a/trunk/drivers/pci/quirks.c b/trunk/drivers/pci/quirks.c index 4bf71028556b..f722c5f6951a 100644 --- a/trunk/drivers/pci/quirks.c +++ b/trunk/drivers/pci/quirks.c @@ -26,11 +26,72 @@ #include #include #include -#include -#include #include /* isa_dma_bridge_buggy */ #include "pci.h" +/* + * This quirk function disables memory decoding and releases memory resources + * of the device specified by kernel's boot parameter 'pci=resource_alignment='. + * It also rounds up size to specified alignment. + * Later on, the kernel will assign page-aligned memory resource back + * to the device. + */ +static void __devinit quirk_resource_alignment(struct pci_dev *dev) +{ + int i; + struct resource *r; + resource_size_t align, size; + u16 command; + + if (!pci_is_reassigndev(dev)) + return; + + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + dev_warn(&dev->dev, + "Can't reassign resources to host bridge.\n"); + return; + } + + dev_info(&dev->dev, + "Disabling memory decoding and releasing memory resources.\n"); + pci_read_config_word(dev, PCI_COMMAND, &command); + command &= ~PCI_COMMAND_MEMORY; + pci_write_config_word(dev, PCI_COMMAND, command); + + align = pci_specified_resource_alignment(dev); + for (i=0; i < PCI_BRIDGE_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + size = resource_size(r); + if (size < align) { + size = align; + dev_info(&dev->dev, + "Rounding up size of resource #%d to %#llx.\n", + i, (unsigned long long)size); + } + r->end = size - 1; + r->start = 0; + } + /* Need to disable bridge's resource window, + * to enable the kernel to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + r->end = resource_size(r) - 1; + r->start = 0; + } + pci_disable_bridge_window(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment); + /* * Decoding should be disabled for a PCI device during BAR sizing to avoid * conflict. But doing so may cause problems on host bridge and perhaps other @@ -39,10 +100,10 @@ */ static void __devinit quirk_mmio_always_on(struct pci_dev *dev) { - dev->mmio_always_on = 1; + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + dev->mmio_always_on = 1; } -DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_BRIDGE_HOST, 8, quirk_mmio_always_on); +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_mmio_always_on); /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow @@ -941,12 +1002,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_vt */ static void quirk_cardbus_legacy(struct pci_dev *dev) { + if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class) + return; pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0); } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_BRIDGE_CARDBUS, 8, quirk_cardbus_legacy); -DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_BRIDGE_CARDBUS, 8, quirk_cardbus_legacy); +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); /* * Following the PCI ordering rules is optional on the AMD762. I'm not @@ -1103,20 +1164,17 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, qui static void __devinit quirk_no_ata_d3(struct pci_dev *pdev) { - pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3; + /* Quirk the legacy ATA devices only. The AHCI ones are ok */ + if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) + pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3; } -/* Quirk the legacy ATA devices only. The AHCI ones are ok */ -DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, - PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3); -DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3); /* ALi loses some register settings that we cannot then restore */ -DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, - PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, quirk_no_ata_d3); /* VIA comes back fine but we need to keep it alive or ACPI GTM failures occur when mode detecting */ -DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID, - PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_no_ata_d3); /* This was originally an Alpha specific thing, but it really fits here. * The i82375 PCI/EISA bridge appears as non-classified. Fix that. @@ -1815,7 +1873,8 @@ static void __devinit quirk_netmos(struct pci_dev *dev) case PCI_DEVICE_ID_NETMOS_9745: case PCI_DEVICE_ID_NETMOS_9845: case PCI_DEVICE_ID_NETMOS_9855: - if (num_parallel) { + if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL && + num_parallel) { dev_info(&dev->dev, "Netmos %04x (%u parallel, " "%u serial); changing class SERIAL to OTHER " "(use parport_serial)\n", @@ -1825,8 +1884,7 @@ static void __devinit quirk_netmos(struct pci_dev *dev) } } } -DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, - PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, quirk_netmos); static void __devinit quirk_e100_interrupt(struct pci_dev *dev) { @@ -1894,8 +1952,7 @@ static void __devinit quirk_e100_interrupt(struct pci_dev *dev) iounmap(csr); } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, - PCI_CLASS_NETWORK_ETHERNET, 8, quirk_e100_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_e100_interrupt); /* * The 82575 and 82598 may experience data corruption issues when transitioning @@ -2777,11 +2834,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); static void __devinit fixup_ti816x_class(struct pci_dev* dev) { /* TI 816x devices do not have class code set when in PCIe boot mode */ - dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n"); - dev->class = PCI_CLASS_MULTIMEDIA_VIDEO; + if (dev->class == PCI_CLASS_NOT_DEFINED) { + dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n"); + dev->class = PCI_CLASS_MULTIMEDIA_VIDEO; + } } -DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_TI, 0xb800, - PCI_CLASS_NOT_DEFINED, 0, fixup_ti816x_class); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class); /* Some PCIe devices do not work reliably with the claimed maximum * payload size supported. @@ -2866,73 +2924,17 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f8, quirk_intel_mc_errata); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, quirk_intel_mc_errata); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata); - -static void do_one_fixup_debug(void (*fn)(struct pci_dev *dev), struct pci_dev *dev) -{ - ktime_t calltime, delta, rettime; - unsigned long long duration; - - printk(KERN_DEBUG "calling %pF @ %i for %s\n", - fn, task_pid_nr(current), dev_name(&dev->dev)); - calltime = ktime_get(); - fn(dev); - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "pci fixup %pF returned after %lld usecs for %s\n", - fn, duration, dev_name(&dev->dev)); -} - -/* - * Some BIOS implementations leave the Intel GPU interrupts enabled, - * even though no one is handling them (f.e. i915 driver is never loaded). - * Additionally the interrupt destination is not set up properly - * and the interrupt ends up -somewhere-. - * - * These spurious interrupts are "sticky" and the kernel disables - * the (shared) interrupt line after 100.000+ generated interrupts. - * - * Fix it by disabling the still enabled interrupts. - * This resolves crashes often seen on monitor unplug. - */ -#define I915_DEIER_REG 0x4400c -static void __devinit disable_igfx_irq(struct pci_dev *dev) -{ - void __iomem *regs = pci_iomap(dev, 0, 0); - if (regs == NULL) { - dev_warn(&dev->dev, "igfx quirk: Can't iomap PCI device\n"); - return; - } - - /* Check if any interrupt line is still enabled */ - if (readl(regs + I915_DEIER_REG) != 0) { - dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; " - "disabling\n"); - - writel(0, regs + I915_DEIER_REG); - } - - pci_iounmap(dev, regs); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); - static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { - for (; f < end; f++) - if ((f->class == (u32) (dev->class >> f->class_shift) || - f->class == (u32) PCI_ANY_ID) && - (f->vendor == dev->vendor || - f->vendor == (u16) PCI_ANY_ID) && - (f->device == dev->device || - f->device == (u16) PCI_ANY_ID)) { + while (f < end) { + if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) && + (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) { dev_dbg(&dev->dev, "calling %pF\n", f->hook); - if (initcall_debug) - do_one_fixup_debug(f->hook, dev); - else - f->hook(dev); + f->hook(dev); } + f++; + } } extern struct pci_fixup __start_pci_fixups_early[]; diff --git a/trunk/drivers/pci/remove.c b/trunk/drivers/pci/remove.c index fd77e2bde2e8..ef8b18c48f26 100644 --- a/trunk/drivers/pci/remove.c +++ b/trunk/drivers/pci/remove.c @@ -79,7 +79,7 @@ EXPORT_SYMBOL(pci_remove_bus); static void __pci_remove_behind_bridge(struct pci_dev *dev); /** - * pci_stop_and_remove_bus_device - remove a PCI device and any children + * pci_remove_bus_device - remove a PCI device and any children * @dev: the device to remove * * Remove a PCI device from the device lists, informing the drivers @@ -90,7 +90,7 @@ static void __pci_remove_behind_bridge(struct pci_dev *dev); * device lists, remove the /proc entry, and notify userspace * (/sbin/hotplug). */ -void __pci_remove_bus_device(struct pci_dev *dev) +static void __pci_remove_bus_device(struct pci_dev *dev) { if (dev->subordinate) { struct pci_bus *b = dev->subordinate; @@ -102,9 +102,7 @@ void __pci_remove_bus_device(struct pci_dev *dev) pci_destroy_dev(dev); } -EXPORT_SYMBOL(__pci_remove_bus_device); - -void pci_stop_and_remove_bus_device(struct pci_dev *dev) +void pci_remove_bus_device(struct pci_dev *dev) { pci_stop_bus_device(dev); __pci_remove_bus_device(dev); @@ -129,15 +127,14 @@ static void pci_stop_behind_bridge(struct pci_dev *dev) } /** - * pci_stop_and_remove_behind_bridge - stop and remove all devices behind - * a PCI bridge + * pci_remove_behind_bridge - remove all devices behind a PCI bridge * @dev: PCI bridge device * * Remove all devices on the bus, except for the parent bridge. * This also removes any child buses, and any devices they may * contain in a depth-first manner. */ -void pci_stop_and_remove_behind_bridge(struct pci_dev *dev) +void pci_remove_behind_bridge(struct pci_dev *dev) { pci_stop_behind_bridge(dev); __pci_remove_behind_bridge(dev); @@ -147,15 +144,7 @@ static void pci_stop_bus_devices(struct pci_bus *bus) { struct list_head *l, *n; - /* - * VFs could be removed by pci_stop_and_remove_bus_device() in the - * pci_stop_bus_devices() code path for PF. - * aka, bus->devices get updated in the process. - * but VFs are inserted after PFs when SRIOV is enabled for PF, - * We can iterate the list backwards to get prev valid PF instead - * of removed VF. - */ - list_for_each_prev_safe(l, n, &bus->devices) { + list_for_each_safe(l, n, &bus->devices) { struct pci_dev *dev = pci_dev_b(l); pci_stop_bus_device(dev); } @@ -177,6 +166,6 @@ void pci_stop_bus_device(struct pci_dev *dev) pci_stop_dev(dev); } -EXPORT_SYMBOL(pci_stop_and_remove_bus_device); -EXPORT_SYMBOL(pci_stop_and_remove_behind_bridge); +EXPORT_SYMBOL(pci_remove_bus_device); +EXPORT_SYMBOL(pci_remove_behind_bridge); EXPORT_SYMBOL_GPL(pci_stop_bus_device); diff --git a/trunk/drivers/pci/setup-bus.c b/trunk/drivers/pci/setup-bus.c index 8fa2d4be88de..86b69f85f900 100644 --- a/trunk/drivers/pci/setup-bus.c +++ b/trunk/drivers/pci/setup-bus.c @@ -25,13 +25,10 @@ #include #include #include -#include #include "pci.h" -unsigned int pci_flags; - -struct pci_dev_resource { - struct list_head list; +struct resource_list_x { + struct resource_list_x *next; struct resource *res; struct pci_dev *dev; resource_size_t start; @@ -41,14 +38,21 @@ struct pci_dev_resource { unsigned long flags; }; -static void free_list(struct list_head *head) +#define free_list(type, head) do { \ + struct type *list, *tmp; \ + for (list = (head)->next; list;) { \ + tmp = list; \ + list = list->next; \ + kfree(tmp); \ + } \ + (head)->next = NULL; \ +} while (0) + +int pci_realloc_enable = 0; +#define pci_realloc_enabled() pci_realloc_enable +void pci_realloc(void) { - struct pci_dev_resource *dev_res, *tmp; - - list_for_each_entry_safe(dev_res, tmp, head, list) { - list_del(&dev_res->list); - kfree(dev_res); - } + pci_realloc_enable = 1; } /** @@ -60,18 +64,21 @@ static void free_list(struct list_head *head) * @add_size: additional size to be optionally added * to the resource */ -static int add_to_list(struct list_head *head, +static void add_to_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res, resource_size_t add_size, resource_size_t min_align) { - struct pci_dev_resource *tmp; + struct resource_list_x *list = head; + struct resource_list_x *ln = list->next; + struct resource_list_x *tmp; - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) { pr_warning("add_to_list: kmalloc() failed!\n"); - return -ENOMEM; + return; } + tmp->next = ln; tmp->res = res; tmp->dev = dev; tmp->start = res->start; @@ -79,100 +86,19 @@ static int add_to_list(struct list_head *head, tmp->flags = res->flags; tmp->add_size = add_size; tmp->min_align = min_align; - - list_add(&tmp->list, head); - - return 0; + list->next = tmp; } -static void remove_from_list(struct list_head *head, - struct resource *res) +static void add_to_failed_list(struct resource_list_x *head, + struct pci_dev *dev, struct resource *res) { - struct pci_dev_resource *dev_res, *tmp; - - list_for_each_entry_safe(dev_res, tmp, head, list) { - if (dev_res->res == res) { - list_del(&dev_res->list); - kfree(dev_res); - break; - } - } -} - -static resource_size_t get_res_add_size(struct list_head *head, - struct resource *res) -{ - struct pci_dev_resource *dev_res; - - list_for_each_entry(dev_res, head, list) { - if (dev_res->res == res) { - int idx = res - &dev_res->dev->resource[0]; - - dev_printk(KERN_DEBUG, &dev_res->dev->dev, - "res[%d]=%pR get_res_add_size add_size %llx\n", - idx, dev_res->res, - (unsigned long long)dev_res->add_size); - - return dev_res->add_size; - } - } - - return 0; -} - -/* Sort resources by alignment */ -static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) -{ - int i; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r; - struct pci_dev_resource *dev_res, *tmp; - resource_size_t r_align; - struct list_head *n; - - r = &dev->resource[i]; - - if (r->flags & IORESOURCE_PCI_FIXED) - continue; - - if (!(r->flags) || r->parent) - continue; - - r_align = pci_resource_alignment(dev, r); - if (!r_align) { - dev_warn(&dev->dev, "BAR %d: %pR has bogus alignment\n", - i, r); - continue; - } - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - panic("pdev_sort_resources(): " - "kmalloc() failed!\n"); - tmp->res = r; - tmp->dev = dev; - - /* fallback is smallest one or list is empty*/ - n = head; - list_for_each_entry(dev_res, head, list) { - resource_size_t align; - - align = pci_resource_alignment(dev_res->dev, - dev_res->res); - - if (r_align > align) { - n = &dev_res->list; - break; - } - } - /* Insert it just before n*/ - list_add_tail(&tmp->list, n); - } + add_to_list(head, dev, res, + 0 /* dont care */, + 0 /* dont care */); } static void __dev_sort_resources(struct pci_dev *dev, - struct list_head *head) + struct resource_list *head) { u16 class = dev->class >> 8; @@ -210,54 +136,49 @@ static inline void reset_resource(struct resource *res) * additional resources for the element, provided the element * is in the head list. */ -static void reassign_resources_sorted(struct list_head *realloc_head, - struct list_head *head) +static void reassign_resources_sorted(struct resource_list_x *realloc_head, + struct resource_list *head) { struct resource *res; - struct pci_dev_resource *add_res, *tmp; - struct pci_dev_resource *dev_res; + struct resource_list_x *list, *tmp, *prev; + struct resource_list *hlist; resource_size_t add_size; int idx; - list_for_each_entry_safe(add_res, tmp, realloc_head, list) { - bool found_match = false; - - res = add_res->res; + prev = realloc_head; + for (list = realloc_head->next; list;) { + res = list->res; /* skip resource that has been reset */ if (!res->flags) goto out; /* skip this resource if not found in head list */ - list_for_each_entry(dev_res, head, list) { - if (dev_res->res == res) { - found_match = true; - break; - } - } - if (!found_match)/* just skip */ + for (hlist = head->next; hlist && hlist->res != res; + hlist = hlist->next); + if (!hlist) { /* just skip */ + prev = list; + list = list->next; continue; + } - idx = res - &add_res->dev->resource[0]; - add_size = add_res->add_size; + idx = res - &list->dev->resource[0]; + add_size=list->add_size; if (!resource_size(res)) { - res->start = add_res->start; + res->start = list->start; res->end = res->start + add_size - 1; - if (pci_assign_resource(add_res->dev, idx)) + if(pci_assign_resource(list->dev, idx)) reset_resource(res); } else { - resource_size_t align = add_res->min_align; - res->flags |= add_res->flags & - (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); - if (pci_reassign_resource(add_res->dev, idx, - add_size, align)) - dev_printk(KERN_DEBUG, &add_res->dev->dev, - "failed to add %llx res[%d]=%pR\n", - (unsigned long long)add_size, - idx, res); + resource_size_t align = list->min_align; + res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); + if (pci_reassign_resource(list->dev, idx, add_size, align)) + dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n", + res); } out: - list_del(&add_res->list); - kfree(add_res); + tmp = list; + prev->next = list = list->next; + kfree(tmp); } } @@ -271,99 +192,35 @@ static void reassign_resources_sorted(struct list_head *realloc_head, * Satisfy resource requests of each element in the list. Add * requests that could not satisfied to the failed_list. */ -static void assign_requested_resources_sorted(struct list_head *head, - struct list_head *fail_head) +static void assign_requested_resources_sorted(struct resource_list *head, + struct resource_list_x *fail_head) { struct resource *res; - struct pci_dev_resource *dev_res; + struct resource_list *list; int idx; - list_for_each_entry(dev_res, head, list) { - res = dev_res->res; - idx = res - &dev_res->dev->resource[0]; - if (resource_size(res) && - pci_assign_resource(dev_res->dev, idx)) { - if (fail_head && !pci_is_root_bus(dev_res->dev->bus)) { + for (list = head->next; list; list = list->next) { + res = list->res; + idx = res - &list->dev->resource[0]; + if (resource_size(res) && pci_assign_resource(list->dev, idx)) { + if (fail_head && !pci_is_root_bus(list->dev->bus)) { /* * if the failed res is for ROM BAR, and it will * be enabled later, don't add it to the list */ if (!((idx == PCI_ROM_RESOURCE) && (!(res->flags & IORESOURCE_ROM_ENABLE)))) - add_to_list(fail_head, - dev_res->dev, res, - 0 /* dont care */, - 0 /* dont care */); + add_to_failed_list(fail_head, list->dev, res); } reset_resource(res); } } } -static void __assign_resources_sorted(struct list_head *head, - struct list_head *realloc_head, - struct list_head *fail_head) +static void __assign_resources_sorted(struct resource_list *head, + struct resource_list_x *realloc_head, + struct resource_list_x *fail_head) { - /* - * Should not assign requested resources at first. - * they could be adjacent, so later reassign can not reallocate - * them one by one in parent resource window. - * Try to assign requested + add_size at begining - * if could do that, could get out early. - * if could not do that, we still try to assign requested at first, - * then try to reassign add_size for some resources. - */ - LIST_HEAD(save_head); - LIST_HEAD(local_fail_head); - struct pci_dev_resource *save_res; - struct pci_dev_resource *dev_res; - - /* Check if optional add_size is there */ - if (!realloc_head || list_empty(realloc_head)) - goto requested_and_reassign; - - /* Save original start, end, flags etc at first */ - list_for_each_entry(dev_res, head, list) { - if (add_to_list(&save_head, dev_res->dev, dev_res->res, 0, 0)) { - free_list(&save_head); - goto requested_and_reassign; - } - } - - /* Update res in head list with add_size in realloc_head list */ - list_for_each_entry(dev_res, head, list) - dev_res->res->end += get_res_add_size(realloc_head, - dev_res->res); - - /* Try updated head list with add_size added */ - assign_requested_resources_sorted(head, &local_fail_head); - - /* all assigned with add_size ? */ - if (list_empty(&local_fail_head)) { - /* Remove head list from realloc_head list */ - list_for_each_entry(dev_res, head, list) - remove_from_list(realloc_head, dev_res->res); - free_list(&save_head); - free_list(head); - return; - } - - free_list(&local_fail_head); - /* Release assigned resource */ - list_for_each_entry(dev_res, head, list) - if (dev_res->res->parent) - release_resource(dev_res->res); - /* Restore start/end/flags from saved list */ - list_for_each_entry(save_res, &save_head, list) { - struct resource *res = save_res->res; - - res->start = save_res->start; - res->end = save_res->end; - res->flags = save_res->flags; - } - free_list(&save_head); - -requested_and_reassign: /* Satisfy the must-have resource requests */ assign_requested_resources_sorted(head, fail_head); @@ -371,27 +228,28 @@ static void __assign_resources_sorted(struct list_head *head, requests */ if (realloc_head) reassign_resources_sorted(realloc_head, head); - free_list(head); + free_list(resource_list, head); } static void pdev_assign_resources_sorted(struct pci_dev *dev, - struct list_head *add_head, - struct list_head *fail_head) + struct resource_list_x *fail_head) { - LIST_HEAD(head); + struct resource_list head; + head.next = NULL; __dev_sort_resources(dev, &head); - __assign_resources_sorted(&head, add_head, fail_head); + __assign_resources_sorted(&head, NULL, fail_head); } static void pbus_assign_resources_sorted(const struct pci_bus *bus, - struct list_head *realloc_head, - struct list_head *fail_head) + struct resource_list_x *realloc_head, + struct resource_list_x *fail_head) { struct pci_dev *dev; - LIST_HEAD(head); + struct resource_list head; + head.next = NULL; list_for_each_entry(dev, &bus->devices, bus_list) __dev_sort_resources(dev, &head); @@ -690,6 +548,20 @@ static resource_size_t calculate_memsize(resource_size_t size, return size; } +static resource_size_t get_res_add_size(struct resource_list_x *realloc_head, + struct resource *res) +{ + struct resource_list_x *list; + + /* check if it is in realloc_head list */ + for (list = realloc_head->next; list && list->res != res; + list = list->next); + if (list) + return list->add_size; + + return 0; +} + /** * pbus_size_io() - size the io window of a given bus * @@ -704,7 +576,7 @@ static resource_size_t calculate_memsize(resource_size_t size, * We must be careful with the ISA aliasing though. */ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, - resource_size_t add_size, struct list_head *realloc_head) + resource_size_t add_size, struct resource_list_x *realloc_head) { struct pci_dev *dev; struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); @@ -740,7 +612,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, if (children_add_size > add_size) add_size = children_add_size; size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : - calculate_iosize(size, min_size, add_size + size1, + calculate_iosize(size, min_size+add_size, size1, resource_size(b_res), 4096); if (!size0 && !size1) { if (b_res->start || b_res->end) @@ -754,12 +626,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->start = 4096; b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; - if (size1 > size0 && realloc_head) { + if (size1 > size0 && realloc_head) add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096); - dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window " - "%pR to [bus %02x-%02x] add_size %lx\n", b_res, - bus->secondary, bus->subordinate, size1-size0); - } } /** @@ -776,7 +644,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type, resource_size_t min_size, resource_size_t add_size, - struct list_head *realloc_head) + struct resource_list_x *realloc_head) { struct pci_dev *dev; resource_size_t min_align, align, size, size0, size1; @@ -858,7 +726,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (children_add_size > add_size) add_size = children_add_size; size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : - calculate_memsize(size, min_size, add_size, + calculate_memsize(size, min_size+add_size, 0, resource_size(b_res), min_align); if (!size0 && !size1) { if (b_res->start || b_res->end) @@ -871,12 +739,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->start = min_align; b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; - if (size1 > size0 && realloc_head) { + if (size1 > size0 && realloc_head) add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align); - dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window " - "%pR to [bus %02x-%02x] add_size %llx\n", b_res, - bus->secondary, bus->subordinate, (unsigned long long)size1-size0); - } return 1; } @@ -890,48 +754,25 @@ unsigned long pci_cardbus_resource_alignment(struct resource *res) } static void pci_bus_size_cardbus(struct pci_bus *bus, - struct list_head *realloc_head) + struct resource_list_x *realloc_head) { struct pci_dev *bridge = bus->self; struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; - resource_size_t b_res_3_size = pci_cardbus_mem_size * 2; u16 ctrl; - if (b_res[0].parent) - goto handle_b_res_1; /* * Reserve some resources for CardBus. We reserve * a fixed amount of bus space for CardBus bridges. */ - b_res[0].start = pci_cardbus_io_size; - b_res[0].end = b_res[0].start + pci_cardbus_io_size - 1; - b_res[0].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN; - if (realloc_head) { - b_res[0].end -= pci_cardbus_io_size; - add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, - pci_cardbus_io_size); - } - -handle_b_res_1: - if (b_res[1].parent) - goto handle_b_res_2; - b_res[1].start = pci_cardbus_io_size; - b_res[1].end = b_res[1].start + pci_cardbus_io_size - 1; - b_res[1].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN; - if (realloc_head) { - b_res[1].end -= pci_cardbus_io_size; - add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, - pci_cardbus_io_size); - } + b_res[0].start = 0; + b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; + if (realloc_head) + add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); -handle_b_res_2: - /* MEM1 must not be pref mmio */ - pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl); - if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM1) { - ctrl &= ~PCI_CB_BRIDGE_CTL_PREFETCH_MEM1; - pci_write_config_word(bridge, PCI_CB_BRIDGE_CONTROL, ctrl); - pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl); - } + b_res[1].start = 0; + b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); /* * Check whether prefetchable memory is supported @@ -944,46 +785,38 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl); } - if (b_res[2].parent) - goto handle_b_res_3; /* * If we have prefetchable memory support, allocate * two regions. Otherwise, allocate one region of * twice the size. */ if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { - b_res[2].start = pci_cardbus_mem_size; - b_res[2].end = b_res[2].start + pci_cardbus_mem_size - 1; - b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | - IORESOURCE_STARTALIGN; - if (realloc_head) { - b_res[2].end -= pci_cardbus_mem_size; - add_to_list(realloc_head, bridge, b_res+2, - pci_cardbus_mem_size, pci_cardbus_mem_size); - } - - /* reduce that to half */ - b_res_3_size = pci_cardbus_mem_size; - } - -handle_b_res_3: - if (b_res[3].parent) - goto handle_done; - b_res[3].start = pci_cardbus_mem_size; - b_res[3].end = b_res[3].start + b_res_3_size - 1; - b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN; - if (realloc_head) { - b_res[3].end -= b_res_3_size; - add_to_list(realloc_head, bridge, b_res+3, b_res_3_size, - pci_cardbus_mem_size); + b_res[2].start = 0; + b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); + + b_res[3].start = 0; + b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); + } else { + b_res[3].start = 0; + b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); } -handle_done: - ; + /* set the size of the resource to zero, so that the resource does not + * get assigned during required-resource allocation cycle but gets assigned + * during the optional-resource allocation cycle. + */ + b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1; + b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0; } void __ref __pci_bus_size_bridges(struct pci_bus *bus, - struct list_head *realloc_head) + struct resource_list_x *realloc_head) { struct pci_dev *dev; unsigned long mask, prefmask; @@ -1025,8 +858,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, * Follow thru */ default: - pbus_size_io(bus, realloc_head ? 0 : additional_io_size, - additional_io_size, realloc_head); + pbus_size_io(bus, 0, additional_io_size, realloc_head); /* If the bridge supports prefetchable range, size it separately. If it doesn't, or its prefetchable window has already been allocated by arch code, try @@ -1034,15 +866,11 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, resources. */ mask = IORESOURCE_MEM; prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; - if (pbus_size_mem(bus, prefmask, prefmask, - realloc_head ? 0 : additional_mem_size, - additional_mem_size, realloc_head)) + if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head)) mask = prefmask; /* Success, size non-prefetch only. */ else additional_mem_size += additional_mem_size; - pbus_size_mem(bus, mask, IORESOURCE_MEM, - realloc_head ? 0 : additional_mem_size, - additional_mem_size, realloc_head); + pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head); break; } } @@ -1054,8 +882,8 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) EXPORT_SYMBOL(pci_bus_size_bridges); static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, - struct list_head *realloc_head, - struct list_head *fail_head) + struct resource_list_x *realloc_head, + struct resource_list_x *fail_head) { struct pci_bus *b; struct pci_dev *dev; @@ -1094,19 +922,17 @@ void __ref pci_bus_assign_resources(const struct pci_bus *bus) EXPORT_SYMBOL(pci_bus_assign_resources); static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge, - struct list_head *add_head, - struct list_head *fail_head) + struct resource_list_x *fail_head) { struct pci_bus *b; - pdev_assign_resources_sorted((struct pci_dev *)bridge, - add_head, fail_head); + pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head); b = bridge->subordinate; if (!b) return; - __pci_bus_assign_resources(b, add_head, fail_head); + __pci_bus_assign_resources(b, NULL, fail_head); switch (bridge->class >> 8) { case PCI_CLASS_BRIDGE_PCI: @@ -1269,58 +1095,6 @@ static int __init pci_get_max_depth(void) return depth; } -/* - * -1: undefined, will auto detect later - * 0: disabled by user - * 1: disabled by auto detect - * 2: enabled by user - * 3: enabled by auto detect - */ -enum enable_type { - undefined = -1, - user_disabled, - auto_disabled, - user_enabled, - auto_enabled, -}; - -static enum enable_type pci_realloc_enable __initdata = undefined; -void __init pci_realloc_get_opt(char *str) -{ - if (!strncmp(str, "off", 3)) - pci_realloc_enable = user_disabled; - else if (!strncmp(str, "on", 2)) - pci_realloc_enable = user_enabled; -} -static bool __init pci_realloc_enabled(void) -{ - return pci_realloc_enable >= user_enabled; -} - -static void __init pci_realloc_detect(void) -{ -#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PCI_REALLOC_ENABLE_AUTO) - struct pci_dev *dev = NULL; - - if (pci_realloc_enable != undefined) - return; - - for_each_pci_dev(dev) { - int i; - - for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) { - struct resource *r = &dev->resource[i]; - - /* Not assigned, or rejected by kernel ? */ - if (r->flags && !r->start) { - pci_realloc_enable = auto_enabled; - - return; - } - } - } -#endif -} /* * first try will not touch pci bridge res @@ -1331,57 +1105,59 @@ void __init pci_assign_unassigned_resources(void) { struct pci_bus *bus; - LIST_HEAD(realloc_head); /* list of resources that + struct resource_list_x realloc_list; /* list of resources that want additional resources */ - struct list_head *add_list = NULL; int tried_times = 0; enum release_type rel_type = leaf_only; - LIST_HEAD(fail_head); - struct pci_dev_resource *fail_res; + struct resource_list_x head, *list; unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; - int pci_try_num = 1; + unsigned long failed_type; + int max_depth = pci_get_max_depth(); + int pci_try_num; - /* don't realloc if asked to do so */ - pci_realloc_detect(); - if (pci_realloc_enabled()) { - int max_depth = pci_get_max_depth(); - pci_try_num = max_depth + 1; - printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", - max_depth, pci_try_num); - } + head.next = NULL; + realloc_list.next = NULL; + + pci_try_num = max_depth + 1; + printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", + max_depth, pci_try_num); again: - /* - * last try will use add_list, otherwise will try good to have as - * must have, so can realloc parent bridge resource - */ - if (tried_times + 1 == pci_try_num) - add_list = &realloc_head; /* Depth first, calculate sizes and alignments of all subordinate buses. */ list_for_each_entry(bus, &pci_root_buses, node) - __pci_bus_size_bridges(bus, add_list); + __pci_bus_size_bridges(bus, &realloc_list); /* Depth last, allocate resources and update the hardware. */ list_for_each_entry(bus, &pci_root_buses, node) - __pci_bus_assign_resources(bus, add_list, &fail_head); - if (add_list) - BUG_ON(!list_empty(add_list)); + __pci_bus_assign_resources(bus, &realloc_list, &head); + BUG_ON(realloc_list.next); tried_times++; /* any device complain? */ - if (list_empty(&fail_head)) + if (!head.next) goto enable_and_dump; - if (tried_times >= pci_try_num) { - if (pci_realloc_enable == undefined) - printk(KERN_INFO "Some PCI device resources are unassigned, try booting with pci=realloc\n"); - else if (pci_realloc_enable == auto_enabled) - printk(KERN_INFO "Automatically enabled pci realloc, if you have problem, try booting with pci=realloc=off\n"); + /* don't realloc if asked to do so */ + if (!pci_realloc_enabled()) { + free_list(resource_list_x, &head); + goto enable_and_dump; + } - free_list(&fail_head); + failed_type = 0; + for (list = head.next; list;) { + failed_type |= list->flags; + list = list->next; + } + /* + * io port are tight, don't try extra + * or if reach the limit, don't want to try more + */ + failed_type &= type_mask; + if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) { + free_list(resource_list_x, &head); goto enable_and_dump; } @@ -1396,23 +1172,25 @@ pci_assign_unassigned_resources(void) * Try to release leaf bridge's resources that doesn't fit resource of * child device under that bridge */ - list_for_each_entry(fail_res, &fail_head, list) { - bus = fail_res->dev->bus; - pci_bus_release_bridge_resources(bus, - fail_res->flags & type_mask, - rel_type); + for (list = head.next; list;) { + bus = list->dev->bus; + pci_bus_release_bridge_resources(bus, list->flags & type_mask, + rel_type); + list = list->next; } /* restore size and flags */ - list_for_each_entry(fail_res, &fail_head, list) { - struct resource *res = fail_res->res; + for (list = head.next; list;) { + struct resource *res = list->res; - res->start = fail_res->start; - res->end = fail_res->end; - res->flags = fail_res->flags; - if (fail_res->dev->subordinate) + res->start = list->start; + res->end = list->end; + res->flags = list->flags; + if (list->dev->subordinate) res->flags = 0; + + list = list->next; } - free_list(&fail_head); + free_list(resource_list_x, &head); goto again; @@ -1429,27 +1207,26 @@ pci_assign_unassigned_resources(void) void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) { struct pci_bus *parent = bridge->subordinate; - LIST_HEAD(add_list); /* list of resources that - want additional resources */ int tried_times = 0; - LIST_HEAD(fail_head); - struct pci_dev_resource *fail_res; + struct resource_list_x head, *list; int retval; unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + head.next = NULL; + again: - __pci_bus_size_bridges(parent, &add_list); - __pci_bridge_assign_resources(bridge, &add_list, &fail_head); - BUG_ON(!list_empty(&add_list)); + pci_bus_size_bridges(parent); + __pci_bridge_assign_resources(bridge, &head); + tried_times++; - if (list_empty(&fail_head)) + if (!head.next) goto enable_all; if (tried_times >= 2) { /* still fail, don't need to try more */ - free_list(&fail_head); + free_list(resource_list_x, &head); goto enable_all; } @@ -1460,24 +1237,27 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) * Try to release leaf bridge's resources that doesn't fit resource of * child device under that bridge */ - list_for_each_entry(fail_res, &fail_head, list) { - struct pci_bus *bus = fail_res->dev->bus; - unsigned long flags = fail_res->flags; + for (list = head.next; list;) { + struct pci_bus *bus = list->dev->bus; + unsigned long flags = list->flags; pci_bus_release_bridge_resources(bus, flags & type_mask, whole_subtree); + list = list->next; } /* restore size and flags */ - list_for_each_entry(fail_res, &fail_head, list) { - struct resource *res = fail_res->res; + for (list = head.next; list;) { + struct resource *res = list->res; - res->start = fail_res->start; - res->end = fail_res->end; - res->flags = fail_res->flags; - if (fail_res->dev->subordinate) + res->start = list->start; + res->end = list->end; + res->flags = list->flags; + if (list->dev->subordinate) res->flags = 0; + + list = list->next; } - free_list(&fail_head); + free_list(resource_list_x, &head); goto again; @@ -1487,41 +1267,3 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) pci_enable_bridges(parent); } EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources); - -#ifdef CONFIG_HOTPLUG -/** - * pci_rescan_bus - scan a PCI bus for devices. - * @bus: PCI bus to scan - * - * Scan a PCI bus and child buses for new devices, adds them, - * and enables them. - * - * Returns the max number of subordinate bus discovered. - */ -unsigned int __ref pci_rescan_bus(struct pci_bus *bus) -{ - unsigned int max; - struct pci_dev *dev; - LIST_HEAD(add_list); /* list of resources that - want additional resources */ - - max = pci_scan_child_bus(bus); - - down_read(&pci_bus_sem); - list_for_each_entry(dev, &bus->devices, bus_list) - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) - if (dev->subordinate) - __pci_bus_size_bridges(dev->subordinate, - &add_list); - up_read(&pci_bus_sem); - __pci_bus_assign_resources(bus, &add_list, NULL); - BUG_ON(!list_empty(&add_list)); - - pci_enable_bridges(bus); - pci_bus_add_devices(bus); - - return max; -} -EXPORT_SYMBOL_GPL(pci_rescan_bus); -#endif diff --git a/trunk/drivers/pci/setup-res.c b/trunk/drivers/pci/setup-res.c index eea85dafc763..b66bfdbd21f7 100644 --- a/trunk/drivers/pci/setup-res.c +++ b/trunk/drivers/pci/setup-res.c @@ -114,6 +114,7 @@ int pci_claim_resource(struct pci_dev *dev, int resource) } EXPORT_SYMBOL(pci_claim_resource); +#ifdef CONFIG_PCI_QUIRKS void pci_disable_bridge_window(struct pci_dev *dev) { dev_info(&dev->dev, "disabling bridge mem windows\n"); @@ -126,6 +127,9 @@ void pci_disable_bridge_window(struct pci_dev *dev) pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); } +#endif /* CONFIG_PCI_QUIRKS */ + + static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, int resno, resource_size_t size, resource_size_t align) @@ -154,44 +158,22 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, return ret; } -/* - * Generic function that returns a value indicating that the device's - * original BIOS BAR address was not saved and so is not available for - * reinstatement. - * - * Can be over-ridden by architecture specific code that implements - * reinstatement functionality rather than leaving it disabled when - * normal allocation attempts fail. - */ -resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) -{ - return 0; -} - static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, int resno, resource_size_t size) { struct resource *root, *conflict; - resource_size_t fw_addr, start, end; + resource_size_t start, end; int ret = 0; - fw_addr = pcibios_retrieve_fw_addr(dev, resno); - if (!fw_addr) - return 1; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; start = res->start; end = res->end; - res->start = fw_addr; + res->start = dev->fw_addr[resno]; res->end = res->start + size - 1; - - root = pci_find_parent_resource(dev, res); - if (!root) { - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - else - root = &iomem_resource; - } - dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", resno, res); conflict = request_resource_conflict(root, res); @@ -246,17 +228,16 @@ int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsiz int ret; if (!res->parent) { - dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resource %pR " + dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR " "\n", resno, res); return -EINVAL; } - /* already aligned with min_align */ - new_size = resource_size(res) + addsize; + new_size = resource_size(res) + addsize + min_align; ret = _pci_assign_resource(dev, resno, new_size, min_align); if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; - dev_info(&dev->dev, "BAR %d: reassigned %pR\n", resno, res); + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } @@ -286,7 +267,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) * where firmware left it. That at least has a chance of * working, which is better than just leaving it disabled. */ - if (ret < 0) + if (ret < 0 && dev->fw_addr[resno]) ret = pci_revert_fw_address(res, dev, resno, size); if (!ret) { @@ -298,6 +279,53 @@ int pci_assign_resource(struct pci_dev *dev, int resno) return ret; } + +/* Sort resources by alignment */ +void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) +{ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r; + struct resource_list *list, *tmp; + resource_size_t r_align; + + r = &dev->resource[i]; + + if (r->flags & IORESOURCE_PCI_FIXED) + continue; + + if (!(r->flags) || r->parent) + continue; + + r_align = pci_resource_alignment(dev, r); + if (!r_align) { + dev_warn(&dev->dev, "BAR %d: %pR has bogus alignment\n", + i, r); + continue; + } + for (list = head; ; list = list->next) { + resource_size_t align = 0; + struct resource_list *ln = list->next; + + if (ln) + align = pci_resource_alignment(ln->dev, ln->res); + + if (r_align > align) { + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + panic("pdev_sort_resources(): " + "kmalloc() failed!\n"); + tmp->next = ln; + tmp->res = r; + tmp->dev = dev; + list->next = tmp; + break; + } + } + } +} + int pci_enable_resources(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/trunk/drivers/pci/xen-pcifront.c b/trunk/drivers/pci/xen-pcifront.c index fd00ff02ab4d..401090110922 100644 --- a/trunk/drivers/pci/xen-pcifront.c +++ b/trunk/drivers/pci/xen-pcifront.c @@ -544,7 +544,7 @@ static void free_root_bus_devs(struct pci_bus *bus) dev = container_of(bus->devices.next, struct pci_dev, bus_list); dev_dbg(&dev->dev, "removing device\n"); - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); } } @@ -1044,7 +1044,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev) domain, bus, slot, func); continue; } - pci_stop_and_remove_bus_device(pci_dev); + pci_remove_bus_device(pci_dev); pci_dev_put(pci_dev); dev_dbg(&pdev->xdev->dev, diff --git a/trunk/drivers/pcmcia/cardbus.c b/trunk/drivers/pcmcia/cardbus.c index 6e75153c5b4f..9a58862f1401 100644 --- a/trunk/drivers/pcmcia/cardbus.c +++ b/trunk/drivers/pcmcia/cardbus.c @@ -108,5 +108,5 @@ void cb_free(struct pcmcia_socket *s) struct pci_dev *bridge = s->cb_dev; if (bridge) - pci_stop_and_remove_behind_bridge(bridge); + pci_remove_behind_bridge(bridge); } diff --git a/trunk/drivers/platform/x86/asus-wmi.c b/trunk/drivers/platform/x86/asus-wmi.c index 9929246895de..72d731c21d45 100644 --- a/trunk/drivers/platform/x86/asus-wmi.c +++ b/trunk/drivers/platform/x86/asus-wmi.c @@ -571,7 +571,7 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus) } else { dev = pci_get_slot(bus, 0); if (dev) { - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } } diff --git a/trunk/drivers/platform/x86/eeepc-laptop.c b/trunk/drivers/platform/x86/eeepc-laptop.c index d9a9e2bedb30..ea44abd8df48 100644 --- a/trunk/drivers/platform/x86/eeepc-laptop.c +++ b/trunk/drivers/platform/x86/eeepc-laptop.c @@ -646,7 +646,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle) } else { dev = pci_get_slot(bus, 0); if (dev) { - pci_stop_and_remove_bus_device(dev); + pci_remove_bus_device(dev); pci_dev_put(dev); } } diff --git a/trunk/drivers/scsi/bnx2fc/bnx2fc_constants.h b/trunk/drivers/scsi/bnx2fc/bnx2fc_constants.h index dad9924abbbb..c12702bb16d6 100644 --- a/trunk/drivers/scsi/bnx2fc/bnx2fc_constants.h +++ b/trunk/drivers/scsi/bnx2fc/bnx2fc_constants.h @@ -47,7 +47,6 @@ #define FCOE_KCQE_COMPLETION_STATUS_CTX_FREE_FAILURE (0x4) #define FCOE_KCQE_COMPLETION_STATUS_NIC_ERROR (0x5) #define FCOE_KCQE_COMPLETION_STATUS_WRONG_HSI_VERSION (0x6) -#define FCOE_KCQE_COMPLETION_STATUS_PARITY_ERROR (0x81) /* CQE type */ #define FCOE_PENDING_CQE_TYPE 0 diff --git a/trunk/drivers/scsi/bnx2i/57xx_iscsi_constants.h b/trunk/drivers/scsi/bnx2i/57xx_iscsi_constants.h index 495a841645f9..57515f1f1690 100644 --- a/trunk/drivers/scsi/bnx2i/57xx_iscsi_constants.h +++ b/trunk/drivers/scsi/bnx2i/57xx_iscsi_constants.h @@ -122,7 +122,6 @@ #define ISCSI_KCQE_COMPLETION_STATUS_LOM_ISCSI_NOT_ENABLED (0x51) #define ISCSI_KCQE_COMPLETION_STATUS_CID_BUSY (0x80) -#define ISCSI_KCQE_COMPLETION_STATUS_PARITY_ERR (0x81) /* SQ/RQ/CQ DB structure sizes */ #define ISCSI_SQ_DB_SIZE (16) diff --git a/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c b/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c index 5e69f468535f..82fa6ce481f0 100644 --- a/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -132,7 +132,7 @@ static int mpt2sas_remove_dead_ioc_func(void *arg) pdev = ioc->pdev; if ((pdev == NULL)) return -1; - pci_stop_and_remove_bus_device(pdev); + pci_remove_bus_device(pdev); return 0; } diff --git a/trunk/drivers/usb/host/pci-quirks.c b/trunk/drivers/usb/host/pci-quirks.c index 11de5f1be981..7732d69e49e0 100644 --- a/trunk/drivers/usb/host/pci-quirks.c +++ b/trunk/drivers/usb/host/pci-quirks.c @@ -893,5 +893,4 @@ static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev) quirk_usb_handoff_xhci(pdev); pci_disable_device(pdev); } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff); +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff); diff --git a/trunk/fs/cifs/README b/trunk/fs/cifs/README index b7d782bab797..895da1dc1550 100644 --- a/trunk/fs/cifs/README +++ b/trunk/fs/cifs/README @@ -753,6 +753,10 @@ module loading or during the runtime by using the interface i.e. echo "value" > /sys/module/cifs/parameters/ -1. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default. +1. echo_retries - The number of echo attempts before giving up and + reconnecting to the server. The default is 5. The value 0 + means never reconnect. + +2. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default. [Y/y/1]. To disable use any of [N/n/0]. diff --git a/trunk/fs/cifs/cifs_debug.c b/trunk/fs/cifs/cifs_debug.c index 573b899b5a5d..24b3dfc05282 100644 --- a/trunk/fs/cifs/cifs_debug.c +++ b/trunk/fs/cifs/cifs_debug.c @@ -171,7 +171,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "TCP status: %d\n\tLocal Users To " "Server: %d SecMode: 0x%x Req On Wire: %d", server->tcpStatus, server->srv_count, - server->sec_mode, in_flight(server)); + server->sec_mode, + atomic_read(&server->inFlight)); #ifdef CONFIG_CIFS_STATS2 seq_printf(m, " In Send: %d In MaxReq Wait: %d", diff --git a/trunk/fs/cifs/cifsfs.c b/trunk/fs/cifs/cifsfs.c index eee522c56ef0..418fc42fb8b2 100644 --- a/trunk/fs/cifs/cifsfs.c +++ b/trunk/fs/cifs/cifsfs.c @@ -76,7 +76,12 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " unsigned int cifs_max_pending = CIFS_MAX_REQ; module_param(cifs_max_pending, int, 0444); MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " - "Default: 32767 Range: 2 to 32767."); + "Default: 50 Range: 2 to 256"); +unsigned short echo_retries = 5; +module_param(echo_retries, ushort, 0644); +MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " + "reconnecting server. Default: 5. 0 means " + "never reconnect."); module_param(enable_oplocks, bool, 0644); MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" "y/Y/1"); @@ -1106,9 +1111,9 @@ init_cifs(void) if (cifs_max_pending < 2) { cifs_max_pending = 2; cFYI(1, "cifs_max_pending set to min of 2"); - } else if (cifs_max_pending > CIFS_MAX_REQ) { - cifs_max_pending = CIFS_MAX_REQ; - cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ); + } else if (cifs_max_pending > 256) { + cifs_max_pending = 256; + cFYI(1, "cifs_max_pending set to max of 256"); } rc = cifs_fscache_register(); @@ -1170,8 +1175,11 @@ static void __exit exit_cifs(void) { cFYI(DBG2, "exit_cifs"); - unregister_filesystem(&cifs_fs_type); + cifs_proc_clean(); + cifs_fscache_unregister(); +#ifdef CONFIG_CIFS_DFS_UPCALL cifs_dfs_release_automount_timer(); +#endif #ifdef CONFIG_CIFS_ACL cifs_destroy_idmaptrees(); exit_cifs_idmap(); @@ -1179,11 +1187,10 @@ exit_cifs(void) #ifdef CONFIG_CIFS_UPCALL unregister_key_type(&cifs_spnego_key_type); #endif - cifs_destroy_request_bufs(); - cifs_destroy_mids(); + unregister_filesystem(&cifs_fs_type); cifs_destroy_inodecache(); - cifs_fscache_unregister(); - cifs_proc_clean(); + cifs_destroy_mids(); + cifs_destroy_request_bufs(); } MODULE_AUTHOR("Steve French "); diff --git a/trunk/fs/cifs/cifsglob.h b/trunk/fs/cifs/cifsglob.h index 339ebe3ebc0d..76e7d8b6da17 100644 --- a/trunk/fs/cifs/cifsglob.h +++ b/trunk/fs/cifs/cifsglob.h @@ -55,9 +55,14 @@ /* * MAX_REQ is the maximum number of requests that WE will send - * on one socket concurrently. + * on one socket concurrently. It also matches the most common + * value of max multiplex returned by servers. We may + * eventually want to use the negotiated value (in case + * future servers can handle more) when we are more confident that + * we will not have problems oveloading the socket with pending + * write data. */ -#define CIFS_MAX_REQ 32767 +#define CIFS_MAX_REQ 50 #define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) @@ -250,9 +255,7 @@ struct TCP_Server_Info { bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ bool tcp_nodelay; - int credits; /* send no more requests at once */ - unsigned int in_flight; /* number of requests on the wire to server */ - spinlock_t req_lock; /* protect the two values above */ + atomic_t inFlight; /* number of requests on the wire to server */ struct mutex srv_mutex; struct task_struct *tsk; char server_GUID[16]; @@ -260,7 +263,6 @@ struct TCP_Server_Info { bool session_estab; /* mark when very first sess is established */ u16 dialect; /* dialect index that server chose */ enum securityEnum secType; - bool oplocks:1; /* enable oplocks */ unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ /* multiplexed reads or writes */ @@ -305,36 +307,6 @@ struct TCP_Server_Info { #endif }; -static inline unsigned int -in_flight(struct TCP_Server_Info *server) -{ - unsigned int num; - spin_lock(&server->req_lock); - num = server->in_flight; - spin_unlock(&server->req_lock); - return num; -} - -static inline int* -get_credits_field(struct TCP_Server_Info *server) -{ - /* - * This will change to switch statement when we reserve slots for echos - * and oplock breaks. - */ - return &server->credits; -} - -static inline bool -has_credits(struct TCP_Server_Info *server, int *credits) -{ - int num; - spin_lock(&server->req_lock); - num = *credits; - spin_unlock(&server->req_lock); - return num > 0; -} - /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. @@ -1038,6 +1010,9 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ +/* reconnect after this many failed echo attempts */ +GLOBAL_EXTERN unsigned short echo_retries; + #ifdef CONFIG_CIFS_ACL GLOBAL_EXTERN struct rb_root uidtree; GLOBAL_EXTERN struct rb_root gidtree; diff --git a/trunk/fs/cifs/cifsproto.h b/trunk/fs/cifs/cifsproto.h index 503e73d8bdb7..6f4e243e0f62 100644 --- a/trunk/fs/cifs/cifsproto.h +++ b/trunk/fs/cifs/cifsproto.h @@ -88,9 +88,6 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf , struct smb_hdr *out_buf, int *bytes_returned); -extern void cifs_add_credits(struct TCP_Server_Info *server, - const unsigned int add); -extern void cifs_set_credits(struct TCP_Server_Info *server, const int val); extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); extern bool is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); @@ -171,13 +168,7 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data, const char *devname); extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); extern void cifs_umount(struct cifs_sb_info *); - -#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); -#else /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */ -#define cifs_dfs_release_automount_timer() do { } while (0) -#endif /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */ - void cifs_proc_init(void); void cifs_proc_clean(void); diff --git a/trunk/fs/cifs/cifssmb.c b/trunk/fs/cifs/cifssmb.c index 70aac35c398f..8b7794c31591 100644 --- a/trunk/fs/cifs/cifssmb.c +++ b/trunk/fs/cifs/cifssmb.c @@ -458,10 +458,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) goto neg_err_exit; } server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); - server->maxReq = min_t(unsigned int, - le16_to_cpu(rsp->MaxMpxCount), - cifs_max_pending); - cifs_set_credits(server, server->maxReq); + server->maxReq = le16_to_cpu(rsp->MaxMpxCount); server->maxBuf = le16_to_cpu(rsp->MaxBufSize); server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); /* even though we do not use raw we might as well set this @@ -567,9 +564,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) /* one byte, so no need to convert this or EncryptionKeyLen from little endian */ - server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), - cifs_max_pending); - cifs_set_credits(server, server->maxReq); + server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); /* probably no need to store and check maxvcs */ server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); @@ -721,7 +716,8 @@ cifs_echo_callback(struct mid_q_entry *mid) struct TCP_Server_Info *server = mid->callback_data; DeleteMidQEntry(mid); - cifs_add_credits(server, 1); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); } int @@ -1673,7 +1669,8 @@ cifs_readv_callback(struct mid_q_entry *mid) queue_work(system_nrt_wq, &rdata->work); DeleteMidQEntry(mid); - cifs_add_credits(server, 1); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); } /* cifs_async_readv - send an async write, and set up mid to handle result */ @@ -2113,7 +2110,8 @@ cifs_writev_callback(struct mid_q_entry *mid) queue_work(system_nrt_wq, &wdata->work); DeleteMidQEntry(mid); - cifs_add_credits(tcon->ses->server, 1); + atomic_dec(&tcon->ses->server->inFlight); + wake_up(&tcon->ses->server->request_q); } /* cifs_async_writev - send an async write, and set up mid to handle result */ diff --git a/trunk/fs/cifs/connect.c b/trunk/fs/cifs/connect.c index 5560e1d5e54b..602f77c304c9 100644 --- a/trunk/fs/cifs/connect.c +++ b/trunk/fs/cifs/connect.c @@ -373,22 +373,12 @@ allocate_buffers(struct TCP_Server_Info *server) static bool server_unresponsive(struct TCP_Server_Info *server) { - /* - * We need to wait 2 echo intervals to make sure we handle such - * situations right: - * 1s client sends a normal SMB request - * 2s client gets a response - * 30s echo workqueue job pops, and decides we got a response recently - * and don't need to send another - * ... - * 65s kernel_recvmsg times out, and we see that we haven't gotten - * a response in >60s. - */ - if (server->tcpStatus == CifsGood && - time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) { + if (echo_retries > 0 && server->tcpStatus == CifsGood && + time_after(jiffies, server->lstrp + + (echo_retries * SMB_ECHO_INTERVAL))) { cERROR(1, "Server %s has not responded in %d seconds. " "Reconnecting...", server->hostname, - (2 * SMB_ECHO_INTERVAL) / HZ); + (echo_retries * SMB_ECHO_INTERVAL / HZ)); cifs_reconnect(server); wake_up(&server->response_q); return true; @@ -652,11 +642,19 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) spin_unlock(&GlobalMid_Lock); wake_up_all(&server->response_q); - /* check if we have blocked requests that need to free */ - spin_lock(&server->req_lock); - if (server->credits <= 0) - server->credits = 1; - spin_unlock(&server->req_lock); + /* + * Check if we have blocked requests that need to free. Note that + * cifs_max_pending is normally 50, but can be set at module install + * time to as little as two. + */ + spin_lock(&GlobalMid_Lock); + if (atomic_read(&server->inFlight) >= cifs_max_pending) + atomic_set(&server->inFlight, cifs_max_pending - 1); + /* + * We do not want to set the max_pending too low or we could end up + * with the counter going negative. + */ + spin_unlock(&GlobalMid_Lock); /* * Although there should not be any requests blocked on this queue it * can not hurt to be paranoid and try to wake up requests that may @@ -1911,8 +1909,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->noblocksnd = volume_info->noblocksnd; tcp_ses->noautotune = volume_info->noautotune; tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; - tcp_ses->in_flight = 0; - tcp_ses->credits = 1; + atomic_set(&tcp_ses->inFlight, 0); init_waitqueue_head(&tcp_ses->response_q); init_waitqueue_head(&tcp_ses->request_q); INIT_LIST_HEAD(&tcp_ses->pending_mid_q); @@ -3374,7 +3371,7 @@ cifs_ra_pages(struct cifs_sb_info *cifs_sb) int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { - int rc; + int rc = 0; int xid; struct cifs_ses *pSesInfo; struct cifs_tcon *tcon; @@ -3401,7 +3398,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) FreeXid(xid); } #endif - rc = 0; tcon = NULL; pSesInfo = NULL; srvTcp = NULL; @@ -3763,11 +3759,9 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) if (server->maxBuf != 0) return 0; - cifs_set_credits(server, 1); rc = CIFSSMBNegotiate(xid, ses); if (rc == -EAGAIN) { /* retry only once on 1st time connection */ - cifs_set_credits(server, 1); rc = CIFSSMBNegotiate(xid, ses); if (rc == -EAGAIN) rc = -EHOSTDOWN; diff --git a/trunk/fs/cifs/dir.c b/trunk/fs/cifs/dir.c index d172c8ed9017..bc7e24420ac0 100644 --- a/trunk/fs/cifs/dir.c +++ b/trunk/fs/cifs/dir.c @@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, } tcon = tlink_tcon(tlink); - if (tcon->ses->server->oplocks) + if (enable_oplocks) oplock = REQ_OPLOCK; if (nd) @@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock; + __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -518,8 +518,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0; - /* * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. diff --git a/trunk/fs/cifs/file.c b/trunk/fs/cifs/file.c index 159fcc56dc2d..5e64748a2917 100644 --- a/trunk/fs/cifs/file.c +++ b/trunk/fs/cifs/file.c @@ -380,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, "inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags, full_path); - if (tcon->ses->server->oplocks) + if (enable_oplocks) oplock = REQ_OPLOCK; else oplock = 0; @@ -505,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, pCifsFile->f_flags, full_path); - if (tcon->ses->server->oplocks) + if (enable_oplocks) oplock = REQ_OPLOCK; else oplock = 0; @@ -960,9 +960,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) INIT_LIST_HEAD(&locks_to_send); /* - * Allocating count locks is enough because no FL_POSIX locks can be - * added to the list while we are holding cinode->lock_mutex that - * protects locking operations of this inode. + * Allocating count locks is enough because no locks can be added to + * the list while we are holding cinode->lock_mutex that protects + * locking operations of this inode. */ for (; i < count; i++) { lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); @@ -973,20 +973,18 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) list_add_tail(&lck->llist, &locks_to_send); } + i = 0; el = locks_to_send.next; lock_flocks(); cifs_for_each_lock(cfile->dentry->d_inode, before) { - flock = *before; - if ((flock->fl_flags & FL_POSIX) == 0) - continue; if (el == &locks_to_send) { - /* - * The list ended. We don't have enough allocated - * structures - something is really wrong. - */ + /* something is really wrong */ cERROR(1, "Can't push all brlocks!"); break; } + flock = *before; + if ((flock->fl_flags & FL_POSIX) == 0) + continue; length = 1 + flock->fl_end - flock->fl_start; if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) type = CIFS_RDLCK; @@ -998,6 +996,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->length = length; lck->type = type; lck->offset = flock->fl_start; + i++; el = el->next; } unlock_flocks(); diff --git a/trunk/fs/cifs/misc.c b/trunk/fs/cifs/misc.c index c273c12de98e..703ef5c6fdb1 100644 --- a/trunk/fs/cifs/misc.c +++ b/trunk/fs/cifs/misc.c @@ -690,22 +690,3 @@ backup_cred(struct cifs_sb_info *cifs_sb) return false; } - -void -cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add) -{ - spin_lock(&server->req_lock); - server->credits += add; - server->in_flight--; - spin_unlock(&server->req_lock); - wake_up(&server->request_q); -} - -void -cifs_set_credits(struct TCP_Server_Info *server, const int val) -{ - spin_lock(&server->req_lock); - server->credits = val; - server->oplocks = val > 1 ? enable_oplocks : false; - spin_unlock(&server->req_lock); -} diff --git a/trunk/fs/cifs/transport.c b/trunk/fs/cifs/transport.c index 310918b6fcb4..0cc9584f5889 100644 --- a/trunk/fs/cifs/transport.c +++ b/trunk/fs/cifs/transport.c @@ -254,60 +254,44 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, return smb_sendv(server, &iov, 1); } -static int -wait_for_free_credits(struct TCP_Server_Info *server, const int optype, - int *credits) +static int wait_for_free_request(struct TCP_Server_Info *server, + const int long_op) { - int rc; - - spin_lock(&server->req_lock); - if (optype == CIFS_ASYNC_OP) { + if (long_op == CIFS_ASYNC_OP) { /* oplock breaks must not be held up */ - server->in_flight++; - *credits -= 1; - spin_unlock(&server->req_lock); + atomic_inc(&server->inFlight); return 0; } + spin_lock(&GlobalMid_Lock); while (1) { - if (*credits <= 0) { - spin_unlock(&server->req_lock); + if (atomic_read(&server->inFlight) >= cifs_max_pending) { + spin_unlock(&GlobalMid_Lock); cifs_num_waiters_inc(server); - rc = wait_event_killable(server->request_q, - has_credits(server, credits)); + wait_event(server->request_q, + atomic_read(&server->inFlight) + < cifs_max_pending); cifs_num_waiters_dec(server); - if (rc) - return rc; - spin_lock(&server->req_lock); + spin_lock(&GlobalMid_Lock); } else { if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->req_lock); + spin_unlock(&GlobalMid_Lock); return -ENOENT; } - /* - * Can not count locking commands against total - * as they are allowed to block on server. - */ + /* can not count locking commands against total + as they are allowed to block on server */ /* update # of requests on the wire to server */ - if (optype != CIFS_BLOCKING_OP) { - *credits -= 1; - server->in_flight++; - } - spin_unlock(&server->req_lock); + if (long_op != CIFS_BLOCKING_OP) + atomic_inc(&server->inFlight); + spin_unlock(&GlobalMid_Lock); break; } } return 0; } -static int -wait_for_free_request(struct TCP_Server_Info *server, const int optype) -{ - return wait_for_free_credits(server, optype, get_credits_field(server)); -} - static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, struct mid_q_entry **ppmidQ) { @@ -375,7 +359,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid = AllocMidQEntry(hdr, server); if (mid == NULL) { mutex_unlock(&server->srv_mutex); - cifs_add_credits(server, 1); + atomic_dec(&server->inFlight); wake_up(&server->request_q); return -ENOMEM; } @@ -408,7 +392,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, return rc; out_err: delete_mid(mid); - cifs_add_credits(server, 1); + atomic_dec(&server->inFlight); wake_up(&server->request_q); return rc; } @@ -580,7 +564,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, mutex_unlock(&ses->server->srv_mutex); cifs_small_buf_release(in_buf); /* Update # of requests on wire to server */ - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); @@ -616,7 +601,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); cifs_small_buf_release(in_buf); - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } spin_unlock(&GlobalMid_Lock); @@ -626,7 +612,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, rc = cifs_sync_mid_result(midQ, ses->server); if (rc != 0) { - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -650,7 +637,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, midQ->resp_buf = NULL; out: delete_mid(midQ); - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -700,7 +688,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (rc) { mutex_unlock(&ses->server->srv_mutex); /* Update # of requests on wire to server */ - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -732,7 +721,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, /* no longer considered to be "in-flight" */ midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } spin_unlock(&GlobalMid_Lock); @@ -740,7 +730,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = cifs_sync_mid_result(midQ, ses->server); if (rc != 0) { - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -756,7 +747,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = cifs_check_receive(midQ, ses->server, 0); out: delete_mid(midQ); - cifs_add_credits(ses->server, 1); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } diff --git a/trunk/fs/lockd/clnt4xdr.c b/trunk/fs/lockd/clnt4xdr.c index 3ddcbb1c0a43..f848b52c67b1 100644 --- a/trunk/fs/lockd/clnt4xdr.c +++ b/trunk/fs/lockd/clnt4xdr.c @@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = { PROC(GRANTED_RES, res, norep), }; -const struct rpc_version nlm_version4 = { +struct rpc_version nlm_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nlm4_procedures), .procs = nlm4_procedures, diff --git a/trunk/fs/lockd/clntlock.c b/trunk/fs/lockd/clntlock.c index ba1dc2eebd1e..8d4ea8351e3d 100644 --- a/trunk/fs/lockd/clntlock.c +++ b/trunk/fs/lockd/clntlock.c @@ -62,8 +62,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, nlm_init->protocol, nlm_version, - nlm_init->hostname, nlm_init->noresvport, - nlm_init->net); + nlm_init->hostname, nlm_init->noresvport); if (host == NULL) { lockd_down(); return ERR_PTR(-ENOLCK); diff --git a/trunk/fs/lockd/clntxdr.c b/trunk/fs/lockd/clntxdr.c index 3d35e3e80c1c..180ac34feb9a 100644 --- a/trunk/fs/lockd/clntxdr.c +++ b/trunk/fs/lockd/clntxdr.c @@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = { PROC(GRANTED_RES, res, norep), }; -static const struct rpc_version nlm_version1 = { +static struct rpc_version nlm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static const struct rpc_version nlm_version3 = { +static struct rpc_version nlm_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static const struct rpc_version *nlm_versions[] = { +static struct rpc_version *nlm_versions[] = { [1] = &nlm_version1, [3] = &nlm_version3, #ifdef CONFIG_LOCKD_V4 @@ -618,7 +618,7 @@ static const struct rpc_version *nlm_versions[] = { static struct rpc_stat nlm_rpc_stats; -const struct rpc_program nlm_program = { +struct rpc_program nlm_program = { .name = "lockd", .number = NLM_PROGRAM, .nrvers = ARRAY_SIZE(nlm_versions), diff --git a/trunk/fs/lockd/host.c b/trunk/fs/lockd/host.c index eb75ca7c2d6e..6f29836ec0cb 100644 --- a/trunk/fs/lockd/host.c +++ b/trunk/fs/lockd/host.c @@ -17,8 +17,6 @@ #include #include -#include - #include #define NLMDBG_FACILITY NLMDBG_HOSTCACHE @@ -56,7 +54,6 @@ struct nlm_lookup_host_info { const char *hostname; /* remote's hostname */ const size_t hostname_len; /* it's length */ const int noresvport; /* use non-priv port */ - struct net *net; /* network namespace to bind */ }; /* @@ -158,7 +155,6 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, INIT_LIST_HEAD(&host->h_reclaim); host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; - host->net = ni->net; out: return host; @@ -210,8 +206,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const unsigned short protocol, const u32 version, const char *hostname, - int noresvport, - struct net *net) + int noresvport) { struct nlm_lookup_host_info ni = { .server = 0, @@ -222,7 +217,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .hostname = hostname, .hostname_len = strlen(hostname), .noresvport = noresvport, - .net = net, }; struct hlist_head *chain; struct hlist_node *pos; @@ -237,8 +231,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, chain = &nlm_client_hosts[nlm_hash_address(sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (host->net != net) - continue; if (!rpc_cmp_addr(nlm_addr(host), sap)) continue; @@ -326,7 +318,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); size_t src_len = rqstp->rq_daddrlen; - struct net *net = rqstp->rq_xprt->xpt_net; struct nlm_lookup_host_info ni = { .server = 1, .sap = svc_addr(rqstp), @@ -335,7 +326,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, .version = rqstp->rq_vers, .hostname = hostname, .hostname_len = hostname_len, - .net = net, }; dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, @@ -349,8 +339,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (host->net != net) - continue; if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) continue; @@ -443,7 +431,7 @@ nlm_bind_host(struct nlm_host *host) .to_retries = 5U, }; struct rpc_create_args args = { - .net = host->net, + .net = &init_net, .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, @@ -565,8 +553,12 @@ void nlm_host_rebooted(const struct nlm_reboot *info) nsm_release(nsm); } +/* + * Shut down the hosts module. + * Note that this routine is called only at server shutdown time. + */ void -nlm_shutdown_hosts_net(struct net *net) +nlm_shutdown_hosts(void) { struct hlist_head *chain; struct hlist_node *pos; @@ -578,8 +570,6 @@ nlm_shutdown_hosts_net(struct net *net) /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts...\n"); for_each_host(host, pos, chain, nlm_server_hosts) { - if (net && host->net != net) - continue; host->h_expires = jiffies - 1; if (host->h_rpcclnt) { rpc_shutdown_client(host->h_rpcclnt); @@ -590,29 +580,15 @@ nlm_shutdown_hosts_net(struct net *net) /* Then, perform a garbage collection pass */ nlm_gc_hosts(); mutex_unlock(&nlm_host_mutex); -} - -/* - * Shut down the hosts module. - * Note that this routine is called only at server shutdown time. - */ -void -nlm_shutdown_hosts(void) -{ - struct hlist_head *chain; - struct hlist_node *pos; - struct nlm_host *host; - - nlm_shutdown_hosts_net(NULL); /* complain if any hosts are left */ if (nrhosts != 0) { printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); dprintk("lockd: %lu hosts left:\n", nrhosts); for_each_host(host, pos, chain, nlm_server_hosts) { - dprintk(" %s (cnt %d use %d exp %ld net %p)\n", + dprintk(" %s (cnt %d use %d exp %ld)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires, host->net); + host->h_inuse, host->h_expires); } } } diff --git a/trunk/fs/lockd/mon.c b/trunk/fs/lockd/mon.c index 7ef14b3c5bee..65ba36b80a9e 100644 --- a/trunk/fs/lockd/mon.c +++ b/trunk/fs/lockd/mon.c @@ -47,7 +47,7 @@ struct nsm_res { u32 state; }; -static const struct rpc_program nsm_program; +static struct rpc_program nsm_program; static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); @@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) return (struct sockaddr *)&nsm->sm_addr; } -static struct rpc_clnt *nsm_create(struct net *net) +static struct rpc_clnt *nsm_create(void) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; struct rpc_create_args args = { - .net = net, + .net = &init_net, .protocol = XPRT_TRANSPORT_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), @@ -83,8 +83,7 @@ static struct rpc_clnt *nsm_create(struct net *net) return rpc_create(&args); } -static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, - struct net *net) +static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) { struct rpc_clnt *clnt; int status; @@ -100,7 +99,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, .rpc_resp = res, }; - clnt = nsm_create(net); + clnt = nsm_create(); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); dprintk("lockd: failed to create NSM upcall transport, " @@ -150,7 +149,7 @@ int nsm_monitor(const struct nlm_host *host) */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net); + status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); if (unlikely(res.status != 0)) status = -EIO; if (unlikely(status < 0)) { @@ -184,7 +183,7 @@ void nsm_unmonitor(const struct nlm_host *host) && nsm->sm_monitored && !nsm->sm_sticky) { dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); - status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net); + status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res); if (res.status != 0) status = -EIO; if (status < 0) @@ -535,19 +534,19 @@ static struct rpc_procinfo nsm_procedures[] = { }, }; -static const struct rpc_version nsm_version1 = { +static struct rpc_version nsm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nsm_procedures), .procs = nsm_procedures }; -static const struct rpc_version *nsm_version[] = { +static struct rpc_version * nsm_version[] = { [1] = &nsm_version1, }; static struct rpc_stat nsm_stats; -static const struct rpc_program nsm_program = { +static struct rpc_program nsm_program = { .name = "statd", .number = NSM_PROGRAM, .nrvers = ARRAY_SIZE(nsm_version), diff --git a/trunk/fs/lockd/netns.h b/trunk/fs/lockd/netns.h deleted file mode 100644 index ce227e0fbc5c..000000000000 --- a/trunk/fs/lockd/netns.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __LOCKD_NETNS_H__ -#define __LOCKD_NETNS_H__ - -#include - -struct lockd_net { - unsigned int nlmsvc_users; -}; - -extern int lockd_net_id; - -#endif diff --git a/trunk/fs/lockd/svc.c b/trunk/fs/lockd/svc.c index 2774e1013b34..c061b9aa7ddb 100644 --- a/trunk/fs/lockd/svc.c +++ b/trunk/fs/lockd/svc.c @@ -35,8 +35,6 @@ #include #include -#include "netns.h" - #define NLMDBG_FACILITY NLMDBG_SVC #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) #define ALLOWED_SIGS (sigmask(SIGKILL)) @@ -52,8 +50,6 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; -int lockd_net_id; - /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 @@ -193,29 +189,27 @@ lockd(void *vrqstp) } static int create_lockd_listener(struct svc_serv *serv, const char *name, - struct net *net, const int family, - const unsigned short port) + const int family, const unsigned short port) { struct svc_xprt *xprt; - xprt = svc_find_xprt(serv, name, net, family, 0); + xprt = svc_find_xprt(serv, name, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, net, family, port, + return svc_create_xprt(serv, name, &init_net, family, port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; } -static int create_lockd_family(struct svc_serv *serv, struct net *net, - const int family) +static int create_lockd_family(struct svc_serv *serv, const int family) { int err; - err = create_lockd_listener(serv, "udp", net, family, nlm_udpport); + err = create_lockd_listener(serv, "udp", family, nlm_udpport); if (err < 0) return err; - return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport); + return create_lockd_listener(serv, "tcp", family, nlm_tcpport); } /* @@ -228,16 +222,16 @@ static int create_lockd_family(struct svc_serv *serv, struct net *net, * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ -static int make_socks(struct svc_serv *serv, struct net *net) +static int make_socks(struct svc_serv *serv) { static int warned; int err; - err = create_lockd_family(serv, net, PF_INET); + err = create_lockd_family(serv, PF_INET); if (err < 0) goto out_err; - err = create_lockd_family(serv, net, PF_INET6); + err = create_lockd_family(serv, PF_INET6); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; @@ -251,47 +245,6 @@ static int make_socks(struct svc_serv *serv, struct net *net) return err; } -static int lockd_up_net(struct net *net) -{ - struct lockd_net *ln = net_generic(net, lockd_net_id); - struct svc_serv *serv = nlmsvc_rqst->rq_server; - int error; - - if (ln->nlmsvc_users) - return 0; - - error = svc_rpcb_setup(serv, net); - if (error) - goto err_rpcb; - - error = make_socks(serv, net); - if (error < 0) - goto err_socks; - return 0; - -err_socks: - svc_rpcb_cleanup(serv, net); -err_rpcb: - return error; -} - -static void lockd_down_net(struct net *net) -{ - struct lockd_net *ln = net_generic(net, lockd_net_id); - struct svc_serv *serv = nlmsvc_rqst->rq_server; - - if (ln->nlmsvc_users) { - if (--ln->nlmsvc_users == 0) { - nlm_shutdown_hosts_net(net); - svc_shutdown_net(serv, net); - } - } else { - printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", - nlmsvc_task, net); - BUG(); - } -} - /* * Bring up the lockd process if it's not already up. */ @@ -299,16 +252,13 @@ int lockd_up(void) { struct svc_serv *serv; int error = 0; - struct net *net = current->nsproxy->net_ns; mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ - if (nlmsvc_rqst) { - error = lockd_up_net(net); + if (nlmsvc_rqst) goto out; - } /* * Sanity check: if there's no pid, @@ -325,7 +275,7 @@ int lockd_up(void) goto out; } - error = make_socks(serv, net); + error = make_socks(serv); if (error < 0) goto destroy_and_out; @@ -363,12 +313,8 @@ int lockd_up(void) destroy_and_out: svc_destroy(serv); out: - if (!error) { - struct lockd_net *ln = net_generic(net, lockd_net_id); - - ln->nlmsvc_users++; + if (!error) nlmsvc_users++; - } mutex_unlock(&nlmsvc_mutex); return error; } @@ -382,10 +328,8 @@ lockd_down(void) { mutex_lock(&nlmsvc_mutex); if (nlmsvc_users) { - if (--nlmsvc_users) { - lockd_down_net(current->nsproxy->net_ns); + if (--nlmsvc_users) goto out; - } } else { printk(KERN_ERR "lockd_down: no users! task=%p\n", nlmsvc_task); @@ -553,55 +497,24 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int, module_param(nsm_use_hostnames, bool, 0644); module_param(nlm_max_connections, uint, 0644); -static int lockd_init_net(struct net *net) -{ - return 0; -} - -static void lockd_exit_net(struct net *net) -{ -} - -static struct pernet_operations lockd_net_ops = { - .init = lockd_init_net, - .exit = lockd_exit_net, - .id = &lockd_net_id, - .size = sizeof(struct lockd_net), -}; - - /* * Initialising and terminating the module. */ static int __init init_nlm(void) { - int err; - #ifdef CONFIG_SYSCTL - err = -ENOMEM; nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); - if (nlm_sysctl_table == NULL) - goto err_sysctl; -#endif - err = register_pernet_subsys(&lockd_net_ops); - if (err) - goto err_pernet; + return nlm_sysctl_table ? 0 : -ENOMEM; +#else return 0; - -err_pernet: -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(nlm_sysctl_table); #endif -err_sysctl: - return err; } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); - unregister_pernet_subsys(&lockd_net_ops); #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); #endif diff --git a/trunk/fs/lockd/svclock.c b/trunk/fs/lockd/svclock.c index e46353f41a42..f0179c3745d2 100644 --- a/trunk/fs/lockd/svclock.c +++ b/trunk/fs/lockd/svclock.c @@ -46,6 +46,7 @@ static void nlmsvc_remove_block(struct nlm_block *block); static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); static void nlmsvc_freegrantargs(struct nlm_rqst *call); static const struct rpc_call_ops nlmsvc_grant_ops; +static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); /* * The list of blocked locks to retry @@ -53,35 +54,6 @@ static const struct rpc_call_ops nlmsvc_grant_ops; static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -#ifdef LOCKD_DEBUG -static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) -{ - /* - * We can get away with a static buffer because we're only - * called with BKL held. - */ - static char buf[2*NLM_MAXCOOKIELEN+1]; - unsigned int i, len = sizeof(buf); - char *p = buf; - - len--; /* allow for trailing \0 */ - if (len < 3) - return "???"; - for (i = 0 ; i < cookie->len ; i++) { - if (len < 2) { - strcpy(p-3, "..."); - break; - } - sprintf(p, "%02x", cookie->data[i]); - p += 2; - len -= 2; - } - *p = '\0'; - - return buf; -} -#endif - /* * Insert a blocked lock into the global list */ @@ -963,3 +935,32 @@ nlmsvc_retry_blocked(void) return timeout; } + +#ifdef RPC_DEBUG +static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) +{ + /* + * We can get away with a static buffer because we're only + * called with BKL held. + */ + static char buf[2*NLM_MAXCOOKIELEN+1]; + unsigned int i, len = sizeof(buf); + char *p = buf; + + len--; /* allow for trailing \0 */ + if (len < 3) + return "???"; + for (i = 0 ; i < cookie->len ; i++) { + if (len < 2) { + strcpy(p-3, "..."); + break; + } + sprintf(p, "%02x", cookie->data[i]); + p += 2; + len -= 2; + } + *p = '\0'; + + return buf; +} +#endif diff --git a/trunk/fs/nfs/Kconfig b/trunk/fs/nfs/Kconfig index 2a0e6c599147..dbcd82126aed 100644 --- a/trunk/fs/nfs/Kconfig +++ b/trunk/fs/nfs/Kconfig @@ -64,7 +64,6 @@ config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS - select KEYS help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. @@ -99,18 +98,6 @@ config PNFS_OBJLAYOUT depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD default m -config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN - string "NFSv4.1 Implementation ID Domain" - depends on NFS_V4_1 - default "kernel.org" - help - This option defines the domain portion of the implementation ID that - may be sent in the NFS exchange_id operation. The value must be in - the format of a DNS domain name and should be set to the DNS domain - name of the distribution. - If the NFS client is unchanged from the upstream kernel, this - option should be set to the default "kernel.org". - config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP @@ -143,10 +130,16 @@ config NFS_USE_KERNEL_DNS bool depends on NFS_V4 && !NFS_USE_LEGACY_DNS select DNS_RESOLVER + select KEYS default y -config NFS_DEBUG - bool - depends on NFS_FS && SUNRPC_DEBUG - select CRC32 - default y +config NFS_USE_NEW_IDMAPPER + bool "Use the new idmapper upcall routine" + depends on NFS_V4 && KEYS + help + Say Y here if you want NFS to use the new idmapper upcall functions. + You will need /sbin/request-key (usually provided by the keyutils + package). For details, read + . + + If you are unsure, say N. diff --git a/trunk/fs/nfs/blocklayout/blocklayout.c b/trunk/fs/nfs/blocklayout/blocklayout.c index 9c94297bb70e..48cfac31f64c 100644 --- a/trunk/fs/nfs/blocklayout/blocklayout.c +++ b/trunk/fs/nfs/blocklayout/blocklayout.c @@ -46,6 +46,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson "); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); +struct dentry *bl_device_pipe; +wait_queue_head_t bl_wq; + static void print_page(struct page *page) { dprintk("PRINTPAGE page %p\n", page); @@ -233,11 +236,12 @@ bl_read_pagelist(struct nfs_read_data *rdata) sector_t isect, extent_length = 0; struct parallel_io *par; loff_t f_offset = rdata->args.offset; + size_t count = rdata->args.count; struct page **pages = rdata->args.pages; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; - dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->npages, f_offset, (unsigned int)rdata->args.count); + dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, + rdata->npages, f_offset, count); par = alloc_parallel(rdata); if (!par) @@ -1021,128 +1025,10 @@ static const struct rpc_pipe_ops bl_upcall_ops = { .destroy_msg = bl_pipe_destroy_msg, }; -static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, - struct rpc_pipe *pipe) -{ - struct dentry *dir, *dentry; - - dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); - if (dir == NULL) - return ERR_PTR(-ENOENT); - dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); - dput(dir); - return dentry; -} - -static void nfs4blocklayout_unregister_sb(struct super_block *sb, - struct rpc_pipe *pipe) -{ - if (pipe->dentry) - rpc_unlink(pipe->dentry); -} - -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) -{ - struct super_block *sb = ptr; - struct net *net = sb->s_fs_info; - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *dentry; - int ret = 0; - - if (!try_module_get(THIS_MODULE)) - return 0; - - if (nn->bl_device_pipe == NULL) { - module_put(THIS_MODULE); - return 0; - } - - switch (event) { - case RPC_PIPEFS_MOUNT: - dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - break; - } - nn->bl_device_pipe->dentry = dentry; - break; - case RPC_PIPEFS_UMOUNT: - if (nn->bl_device_pipe->dentry) - nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); - break; - default: - ret = -ENOTSUPP; - break; - } - module_put(THIS_MODULE); - return ret; -} - -static struct notifier_block nfs4blocklayout_block = { - .notifier_call = rpc_pipefs_event, -}; - -static struct dentry *nfs4blocklayout_register_net(struct net *net, - struct rpc_pipe *pipe) -{ - struct super_block *pipefs_sb; - struct dentry *dentry; - - pipefs_sb = rpc_get_sb_net(net); - if (!pipefs_sb) - return NULL; - dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); - rpc_put_sb_net(net); - return dentry; -} - -static void nfs4blocklayout_unregister_net(struct net *net, - struct rpc_pipe *pipe) -{ - struct super_block *pipefs_sb; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - nfs4blocklayout_unregister_sb(pipefs_sb, pipe); - rpc_put_sb_net(net); - } -} - -static int nfs4blocklayout_net_init(struct net *net) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *dentry; - - init_waitqueue_head(&nn->bl_wq); - nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); - if (IS_ERR(nn->bl_device_pipe)) - return PTR_ERR(nn->bl_device_pipe); - dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); - if (IS_ERR(dentry)) { - rpc_destroy_pipe_data(nn->bl_device_pipe); - return PTR_ERR(dentry); - } - nn->bl_device_pipe->dentry = dentry; - return 0; -} - -static void nfs4blocklayout_net_exit(struct net *net) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - - nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); - rpc_destroy_pipe_data(nn->bl_device_pipe); - nn->bl_device_pipe = NULL; -} - -static struct pernet_operations nfs4blocklayout_net_ops = { - .init = nfs4blocklayout_net_init, - .exit = nfs4blocklayout_net_exit, -}; - static int __init nfs4blocklayout_init(void) { + struct vfsmount *mnt; + struct path path; int ret; dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); @@ -1151,17 +1037,32 @@ static int __init nfs4blocklayout_init(void) if (ret) goto out; - ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); - if (ret) + init_waitqueue_head(&bl_wq); + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) { + ret = PTR_ERR(mnt); goto out_remove; - ret = register_pernet_subsys(&nfs4blocklayout_net_ops); + } + + ret = vfs_path_lookup(mnt->mnt_root, + mnt, + NFS_PIPE_DIRNAME, 0, &path); if (ret) - goto out_notifier; + goto out_putrpc; + + bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, + &bl_upcall_ops, 0); + path_put(&path); + if (IS_ERR(bl_device_pipe)) { + ret = PTR_ERR(bl_device_pipe); + goto out_putrpc; + } out: return ret; -out_notifier: - rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); +out_putrpc: + rpc_put_mount(); out_remove: pnfs_unregister_layoutdriver(&blocklayout_type); return ret; @@ -1172,9 +1073,9 @@ static void __exit nfs4blocklayout_exit(void) dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", __func__); - rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); - unregister_pernet_subsys(&nfs4blocklayout_net_ops); pnfs_unregister_layoutdriver(&blocklayout_type); + rpc_unlink(bl_device_pipe); + rpc_put_mount(); } MODULE_ALIAS("nfs-layouttype4-3"); diff --git a/trunk/fs/nfs/blocklayout/blocklayout.h b/trunk/fs/nfs/blocklayout/blocklayout.h index 03350690118e..e31a2df28e70 100644 --- a/trunk/fs/nfs/blocklayout/blocklayout.h +++ b/trunk/fs/nfs/blocklayout/blocklayout.h @@ -37,7 +37,6 @@ #include #include "../pnfs.h" -#include "../netns.h" #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) @@ -51,7 +50,6 @@ struct pnfs_block_dev { struct list_head bm_node; struct nfs4_deviceid bm_mdevid; /* associated devid */ struct block_device *bm_mdev; /* meta device itself */ - struct net *net; }; enum exstate4 { @@ -153,9 +151,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) return BLK_LO2EXT(lseg->pls_layout); } -struct bl_pipe_msg { - struct rpc_pipe_msg msg; - wait_queue_head_t *bl_wq; +struct bl_dev_msg { + int32_t status; + uint32_t major, minor; }; struct bl_msg_hdr { @@ -163,6 +161,9 @@ struct bl_msg_hdr { u16 totallen; /* length of entire message, including hdr itself */ }; +extern struct dentry *bl_device_pipe; +extern wait_queue_head_t bl_wq; + #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ diff --git a/trunk/fs/nfs/blocklayout/blocklayoutdev.c b/trunk/fs/nfs/blocklayout/blocklayoutdev.c index a5c88a554d92..d08ba9107fde 100644 --- a/trunk/fs/nfs/blocklayout/blocklayoutdev.c +++ b/trunk/fs/nfs/blocklayout/blocklayoutdev.c @@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) *rp = xdr_decode_hyper(*rp, &s); if (s & 0x1ff) { - printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); + printk(KERN_WARNING "%s: sector not aligned\n", __func__); return -1; } *sp = s >> SECTOR_SHIFT; @@ -79,30 +79,27 @@ int nfs4_blkdev_put(struct block_device *bdev) return blkdev_put(bdev, FMODE_READ); } +static struct bl_dev_msg bl_mount_reply; + ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { - struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, - nfs_net_id); - if (mlen != sizeof (struct bl_dev_msg)) return -EINVAL; - if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) + if (copy_from_user(&bl_mount_reply, src, mlen) != 0) return -EFAULT; - wake_up(&nn->bl_wq); + wake_up(&bl_wq); return mlen; } void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) { - struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); - if (msg->errno >= 0) return; - wake_up(bl_pipe_msg->bl_wq); + wake_up(&bl_wq); } /* @@ -114,33 +111,29 @@ nfs4_blk_decode_device(struct nfs_server *server, { struct pnfs_block_dev *rv; struct block_device *bd = NULL; - struct bl_pipe_msg bl_pipe_msg; - struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; + struct rpc_pipe_msg msg; struct bl_msg_hdr bl_msg = { .type = BL_DEVICE_MOUNT, .totallen = dev->mincount, }; uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); + struct bl_dev_msg *reply = &bl_mount_reply; int offset, len, i, rc; - struct net *net = server->nfs_client->net; - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct bl_dev_msg *reply = &nn->bl_mount_reply; dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, dev->mincount); - bl_pipe_msg.bl_wq = &nn->bl_wq; - memset(msg, 0, sizeof(*msg)); - msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); - if (!msg->data) { + memset(&msg, 0, sizeof(msg)); + msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); + if (!msg.data) { rv = ERR_PTR(-ENOMEM); goto out; } - memcpy(msg->data, &bl_msg, sizeof(bl_msg)); - dataptr = (uint8_t *) msg->data; + memcpy(msg.data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg.data; len = dev->mincount; offset = sizeof(bl_msg); for (i = 0; len > 0; i++) { @@ -149,13 +142,13 @@ nfs4_blk_decode_device(struct nfs_server *server, len -= PAGE_CACHE_SIZE; offset += PAGE_CACHE_SIZE; } - msg->len = sizeof(bl_msg) + dev->mincount; + msg.len = sizeof(bl_msg) + dev->mincount; dprintk("%s CALLING USERSPACE DAEMON\n", __func__); - add_wait_queue(&nn->bl_wq, &wq); - rc = rpc_queue_upcall(nn->bl_device_pipe, msg); + add_wait_queue(&bl_wq, &wq); + rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); if (rc < 0) { - remove_wait_queue(&nn->bl_wq, &wq); + remove_wait_queue(&bl_wq, &wq); rv = ERR_PTR(rc); goto out; } @@ -163,7 +156,7 @@ nfs4_blk_decode_device(struct nfs_server *server, set_current_state(TASK_UNINTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&nn->bl_wq, &wq); + remove_wait_queue(&bl_wq, &wq); if (reply->status != BL_DEVICE_REQUEST_PROC) { dprintk("%s failed to open device: %d\n", @@ -188,14 +181,13 @@ nfs4_blk_decode_device(struct nfs_server *server, rv->bm_mdev = bd; memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); - rv->net = net; dprintk("%s Created device %s with bd_block_size %u\n", __func__, bd->bd_disk->disk_name, bd->bd_block_size); out: - kfree(msg->data); + kfree(msg.data); return rv; } diff --git a/trunk/fs/nfs/blocklayout/blocklayoutdm.c b/trunk/fs/nfs/blocklayout/blocklayoutdm.c index 737d839bc17b..d055c7558073 100644 --- a/trunk/fs/nfs/blocklayout/blocklayoutdm.c +++ b/trunk/fs/nfs/blocklayout/blocklayoutdm.c @@ -38,10 +38,9 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static void dev_remove(struct net *net, dev_t dev) +static void dev_remove(dev_t dev) { - struct bl_pipe_msg bl_pipe_msg; - struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; + struct rpc_pipe_msg msg; struct bl_dev_msg bl_umount_request; struct bl_msg_hdr bl_msg = { .type = BL_DEVICE_UMOUNT, @@ -49,38 +48,36 @@ static void dev_remove(struct net *net, dev_t dev) }; uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); - struct nfs_net *nn = net_generic(net, nfs_net_id); dprintk("Entering %s\n", __func__); - bl_pipe_msg.bl_wq = &nn->bl_wq; - memset(msg, 0, sizeof(*msg)); - msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); - if (!msg->data) + memset(&msg, 0, sizeof(msg)); + msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + if (!msg.data) goto out; memset(&bl_umount_request, 0, sizeof(bl_umount_request)); bl_umount_request.major = MAJOR(dev); bl_umount_request.minor = MINOR(dev); - memcpy(msg->data, &bl_msg, sizeof(bl_msg)); - dataptr = (uint8_t *) msg->data; + memcpy(msg.data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg.data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg->len = sizeof(bl_msg) + bl_msg.totallen; + msg.len = sizeof(bl_msg) + bl_msg.totallen; - add_wait_queue(&nn->bl_wq, &wq); - if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { - remove_wait_queue(&nn->bl_wq, &wq); + add_wait_queue(&bl_wq, &wq); + if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { + remove_wait_queue(&bl_wq, &wq); goto out; } set_current_state(TASK_UNINTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&nn->bl_wq, &wq); + remove_wait_queue(&bl_wq, &wq); out: - kfree(msg->data); + kfree(msg.data); } /* @@ -93,10 +90,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) dprintk("%s Releasing\n", __func__); rv = nfs4_blkdev_put(bdev->bm_mdev); if (rv) - printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n", + printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", __func__, rv); - dev_remove(bdev->net, bdev->bm_mdev->bd_dev); + dev_remove(bdev->bm_mdev->bd_dev); } void bl_free_block_dev(struct pnfs_block_dev *bdev) diff --git a/trunk/fs/nfs/blocklayout/extents.c b/trunk/fs/nfs/blocklayout/extents.c index 1f9a6032796b..1abac09f7cd5 100644 --- a/trunk/fs/nfs/blocklayout/extents.c +++ b/trunk/fs/nfs/blocklayout/extents.c @@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks, count = (int)(end - start) / (int)tree->mtt_step_size; /* Pre-malloc what memory we might need */ - storage = kcalloc(count, sizeof(*storage), GFP_NOFS); + storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); if (!storage) return -ENOMEM; for (i = 0; i < count; i++) { diff --git a/trunk/fs/nfs/cache_lib.c b/trunk/fs/nfs/cache_lib.c index dded26368111..c98b439332fc 100644 --- a/trunk/fs/nfs/cache_lib.c +++ b/trunk/fs/nfs/cache_lib.c @@ -13,7 +13,6 @@ #include #include #include -#include #include "cache_lib.h" @@ -112,54 +111,30 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) return 0; } -int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) +int nfs_cache_register(struct cache_detail *cd) { + struct vfsmount *mnt; + struct path path; int ret; - struct dentry *dir; - dir = rpc_d_lookup_sb(sb, "cache"); - BUG_ON(dir == NULL); - ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd); - dput(dir); + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); + if (ret) + goto err; + ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd); + path_put(&path); + if (!ret) + return ret; +err: + rpc_put_mount(); return ret; } -int nfs_cache_register_net(struct net *net, struct cache_detail *cd) +void nfs_cache_unregister(struct cache_detail *cd) { - struct super_block *pipefs_sb; - int ret = 0; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - ret = nfs_cache_register_sb(pipefs_sb, cd); - rpc_put_sb_net(net); - } - return ret; -} - -void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) -{ - if (cd->u.pipefs.dir) - sunrpc_cache_unregister_pipefs(cd); -} - -void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) -{ - struct super_block *pipefs_sb; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - nfs_cache_unregister_sb(pipefs_sb, cd); - rpc_put_sb_net(net); - } -} - -void nfs_cache_init(struct cache_detail *cd) -{ - sunrpc_init_cache_detail(cd); + sunrpc_cache_unregister_pipefs(cd); + rpc_put_mount(); } -void nfs_cache_destroy(struct cache_detail *cd) -{ - sunrpc_destroy_cache_detail(cd); -} diff --git a/trunk/fs/nfs/cache_lib.h b/trunk/fs/nfs/cache_lib.h index 317db95e37f8..7cf6cafcc007 100644 --- a/trunk/fs/nfs/cache_lib.h +++ b/trunk/fs/nfs/cache_lib.h @@ -23,11 +23,5 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern void nfs_cache_init(struct cache_detail *cd); -extern void nfs_cache_destroy(struct cache_detail *cd); -extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); -extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); -extern int nfs_cache_register_sb(struct super_block *sb, - struct cache_detail *cd); -extern void nfs_cache_unregister_sb(struct super_block *sb, - struct cache_detail *cd); +extern int nfs_cache_register(struct cache_detail *cd); +extern void nfs_cache_unregister(struct cache_detail *cd); diff --git a/trunk/fs/nfs/callback.c b/trunk/fs/nfs/callback.c index eb95f5091c1a..516f3375e067 100644 --- a/trunk/fs/nfs/callback.c +++ b/trunk/fs/nfs/callback.c @@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp) } if (err < 0) { if (err != preverr) { - printk(KERN_WARNING "NFS: %s: unexpected error " + printk(KERN_WARNING "%s: unexpected error " "from svc_recv (%d)\n", __func__, err); preverr = err; } @@ -101,12 +101,12 @@ nfs4_callback_svc(void *vrqstp) /* * Prepare to bring up the NFSv4 callback service */ -static struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +struct svc_rqst * +nfs4_callback_up(struct svc_serv *serv) { int ret; - ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; @@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp) /* * Bring up the NFSv4.1 callback service */ -static struct svc_rqst * +struct svc_rqst * nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { struct svc_rqst *rqstp; @@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) * fore channel connection. * Returns the input port (0) and sets the svc_serv bc_xprt on success */ - ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, + ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, SVC_SOCK_ANONYMOUS); if (ret < 0) { rqstp = ERR_PTR(ret); @@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) serv, xprt, &rqstp, &callback_svc); if (!minorversion_setup) { /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv, xprt); + rqstp = nfs4_callback_up(serv); callback_svc = nfs4_callback_svc; } @@ -332,6 +332,7 @@ void nfs_callback_down(int minorversion) int check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { + struct rpc_clnt *r = clp->cl_rpcclient; char *p = svc_gss_principal(rqstp); if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) @@ -352,7 +353,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) if (memcmp(p, "nfs@", 4) != 0) return 0; p += 4; - if (strcmp(p, clp->cl_hostname) != 0) + if (strcmp(p, r->cl_server) != 0) return 0; return 1; } diff --git a/trunk/fs/nfs/callback.h b/trunk/fs/nfs/callback.h index a5527c90a5aa..c89d3b9e483c 100644 --- a/trunk/fs/nfs/callback.h +++ b/trunk/fs/nfs/callback.h @@ -38,8 +38,7 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; - u32 slotid; - struct net *net; + int slotid; }; struct cb_compound_hdr_arg { diff --git a/trunk/fs/nfs/callback_proc.c b/trunk/fs/nfs/callback_proc.c index 1b5d809a105e..54cea8ad5a76 100644 --- a/trunk/fs/nfs/callback_proc.c +++ b/trunk/fs/nfs/callback_proc.c @@ -8,7 +8,6 @@ #include #include #include -#include #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -34,7 +33,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - dprintk_rcu("NFS: GETATTR callback request from %s\n", + dprintk("NFS: GETATTR callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); inode = nfs_delegation_find_inode(cps->clp, &args->fh); @@ -74,7 +73,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; - dprintk_rcu("NFS: RECALL callback request from %s\n", + dprintk("NFS: RECALL callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); res = htonl(NFS4ERR_BADHANDLE); @@ -87,7 +86,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, res = 0; break; case -ENOENT: - res = htonl(NFS4ERR_BAD_STATEID); + if (res != 0) + res = htonl(NFS4ERR_BAD_STATEID); break; default: res = htonl(NFS4ERR_RESOURCE); @@ -98,64 +98,52 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, return res; } +int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) +{ + if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + return 0; + return 1; +} + #if defined(CONFIG_NFS_V4_1) -/* - * Lookup a layout by filehandle. - * - * Note: gets a refcount on the layout hdr and on its respective inode. - * Caller must put the layout hdr and the inode. - * - * TODO: keep track of all layouts (and delegations) in a hash table - * hashed by filehandle. - */ -static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) +static u32 initiate_file_draining(struct nfs_client *clp, + struct cb_layoutrecallargs *args) { struct nfs_server *server; - struct inode *ino; struct pnfs_layout_hdr *lo; + struct inode *ino; + bool found = false; + u32 rv = NFS4ERR_NOMATCHING_LAYOUT; + LIST_HEAD(free_me_list); + spin_lock(&clp->cl_lock); + rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { - if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) continue; ino = igrab(lo->plh_inode); if (!ino) continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ get_layout_hdr(lo); - return lo; + break; } + if (found) + break; } - - return NULL; -} - -static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) -{ - struct pnfs_layout_hdr *lo; - - spin_lock(&clp->cl_lock); - rcu_read_lock(); - lo = get_layout_by_fh_locked(clp, fh); rcu_read_unlock(); spin_unlock(&clp->cl_lock); - return lo; -} - -static u32 initiate_file_draining(struct nfs_client *clp, - struct cb_layoutrecallargs *args) -{ - struct inode *ino; - struct pnfs_layout_hdr *lo; - u32 rv = NFS4ERR_NOMATCHING_LAYOUT; - LIST_HEAD(free_me_list); - - lo = get_layout_by_fh(clp, &args->cbl_fh); - if (!lo) + if (!found) return NFS4ERR_NOMATCHING_LAYOUT; - ino = lo->plh_inode; spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || mark_matching_lsegs_invalid(lo, &free_me_list, @@ -225,13 +213,17 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp, struct cb_layoutrecallargs *args) { - u32 res; + u32 res = NFS4ERR_DELAY; dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); + if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state)) + goto out; if (args->cbl_recall_type == RETURN_FILE) res = initiate_file_draining(clp, args); else res = initiate_bulk_draining(clp, args); + clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state); +out: dprintk("%s returning %i\n", __func__, res); return res; @@ -311,6 +303,21 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, return res; } +int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) +{ + if (delegation == NULL) + return 0; + + if (stateid->stateid.seqid != 0) + return 0; + if (memcmp(&delegation->stateid.stateid.other, + &stateid->stateid.other, + NFS4_STATEID_OTHER_SIZE)) + return 0; + + return 1; +} + /* * Validate the sequenceID sent by the server. * Return success if the sequenceID is one more than what we last saw on @@ -434,7 +441,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, int i; __be32 status = htonl(NFS4ERR_BADSESSION); - clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); + clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; @@ -510,7 +517,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk_rcu("NFS: RECALL_ANY callback request from %s\n", + dprintk("NFS: RECALL_ANY callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); status = cpu_to_be32(NFS4ERR_INVAL); @@ -545,7 +552,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), args->crsa_target_max_slots); diff --git a/trunk/fs/nfs/callback_xdr.c b/trunk/fs/nfs/callback_xdr.c index 95bfc243992c..d50b2742f23b 100644 --- a/trunk/fs/nfs/callback_xdr.c +++ b/trunk/fs/nfs/callback_xdr.c @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include #include "nfs4_fs.h" @@ -75,7 +73,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) p = xdr_inline_decode(xdr, nbytes); if (unlikely(p == NULL)) - printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); + printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); return p; } @@ -140,10 +138,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { __be32 *p; - p = read_buf(xdr, NFS4_STATEID_SIZE); + p = read_buf(xdr, 16); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(stateid, p, NFS4_STATEID_SIZE); + memcpy(stateid->data, p, 16); return 0; } @@ -157,7 +155,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound return status; /* We do not like overly long tags! */ if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { - printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", + printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); } @@ -169,7 +167,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (hdr->minorversion <= 1) { hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ } else { - pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " + printk(KERN_WARNING "%s: NFSv4 server callback with " "illegal minor version %u!\n", __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); @@ -761,14 +759,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->highest_used_slotid = -1; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (cps->slotid != NFS4_NO_SLOT) + if (cps->slotid != -1) nfs4_callback_free_slot(cps->clp->cl_session); } @@ -862,8 +860,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, - .slotid = NFS4_NO_SLOT, - .net = rqstp->rq_xprt->xpt_net, + .slotid = -1, }; unsigned int nops = 0; @@ -879,7 +876,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } diff --git a/trunk/fs/nfs/client.c b/trunk/fs/nfs/client.c index 4a108a0a2a60..d4f772ebd1ef 100644 --- a/trunk/fs/nfs/client.c +++ b/trunk/fs/nfs/client.c @@ -40,8 +40,6 @@ #include #include #include -#include -#include #include @@ -52,12 +50,15 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" -#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT +static DEFINE_SPINLOCK(nfs_client_lock); +static LIST_HEAD(nfs_client_list); +static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 +static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ /* * Get a unique NFSv4.0 callback identifier which will be used @@ -66,16 +67,15 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) { int ret = 0; - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) + if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) return -ENOMEM; - spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nn->nfs_client_lock); + spin_lock(&nfs_client_lock); + ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); + spin_unlock(&nfs_client_lock); if (ret == -EAGAIN) goto retry; return ret; @@ -90,7 +90,7 @@ static bool nfs4_disable_idmapping = true; /* * RPC cruft for NFS */ -static const struct rpc_version *nfs_version[5] = { +static struct rpc_version *nfs_version[5] = { [2] = &nfs_version2, #ifdef CONFIG_NFS_V3 [3] = &nfs_version3, @@ -100,7 +100,7 @@ static const struct rpc_version *nfs_version[5] = { #endif }; -const struct rpc_program nfs_program = { +struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), @@ -116,11 +116,11 @@ struct rpc_stat nfs_rpcstat = { #ifdef CONFIG_NFS_V3_ACL static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static const struct rpc_version *nfsacl_version[] = { +static struct rpc_version * nfsacl_version[] = { [3] = &nfsacl_version3, }; -const struct rpc_program nfsacl_program = { +struct rpc_program nfsacl_program = { .name = "nfsacl", .number = NFS_ACL_PROGRAM, .nrvers = ARRAY_SIZE(nfsacl_version), @@ -136,7 +136,6 @@ struct nfs_client_initdata { const struct nfs_rpc_ops *rpc_ops; int proto; u32 minorversion; - struct net *net; }; /* @@ -173,7 +172,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; - clp->net = get_net(cl_init->net); #ifdef CONFIG_NFS_V4 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -205,11 +203,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ #ifdef CONFIG_NFS_V4_1 static void nfs4_shutdown_session(struct nfs_client *clp) { - if (nfs4_has_session(clp)) { - nfs4_deviceid_purge_client(clp); + if (nfs4_has_session(clp)) nfs4_destroy_session(clp->cl_session); - } - } #else /* CONFIG_NFS_V4_1 */ static void nfs4_shutdown_session(struct nfs_client *clp) @@ -239,20 +234,16 @@ static void nfs4_shutdown_client(struct nfs_client *clp) } /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ -void nfs_cleanup_cb_ident_idr(struct net *net) +void nfs_cleanup_cb_ident_idr(void) { - struct nfs_net *nn = net_generic(net, nfs_net_id); - - idr_destroy(&nn->cb_ident_idr); + idr_destroy(&cb_ident_idr); } /* nfs_client_lock held */ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - if (clp->cl_cb_ident) - idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); + idr_remove(&cb_ident_idr, clp->cl_cb_ident); } static void pnfs_init_server(struct nfs_server *server) @@ -270,7 +261,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { } -void nfs_cleanup_cb_ident_idr(struct net *net) +void nfs_cleanup_cb_ident_idr(void) { } @@ -302,10 +293,10 @@ static void nfs_free_client(struct nfs_client *clp) if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); - put_net(clp->net); + nfs4_deviceid_purge_client(clp); + kfree(clp->cl_hostname); kfree(clp->server_scope); - kfree(clp->impl_id); kfree(clp); dprintk("<-- nfs_free_client()\n"); @@ -316,18 +307,15 @@ static void nfs_free_client(struct nfs_client *clp) */ void nfs_put_client(struct nfs_client *clp) { - struct nfs_net *nn; - if (!clp) return; dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); - nn = net_generic(clp->net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { + if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); BUG_ON(!list_empty(&clp->cl_superblocks)); @@ -405,7 +393,6 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, (sin1->sin_port == sin2->sin_port); } -#if defined(CONFIG_NFS_V4_1) /* * Test if two socket addresses represent the same actual socket, * by comparing (only) relevant fields, excluding the port number. @@ -424,7 +411,6 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } -#endif /* CONFIG_NFS_V4_1 */ /* * Test if two socket addresses represent the same actual socket, @@ -445,10 +431,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } -#if defined(CONFIG_NFS_V4_1) /* Common match routine for v4.0 and v4.1 callback services */ -static bool nfs4_cb_match_client(const struct sockaddr *addr, - struct nfs_client *clp, u32 minorversion) +bool +nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, + u32 minorversion) { struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; @@ -468,7 +454,6 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr, return true; } -#endif /* CONFIG_NFS_V4_1 */ /* * Find an nfs_client on the list that matches the initialisation data @@ -478,9 +463,8 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat { struct nfs_client *clp; const struct sockaddr *sap = data->addr; - struct nfs_net *nn = net_generic(data->net, nfs_net_id); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) @@ -518,14 +502,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; int error; - struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); dprintk("--> nfs_get_client(%s,v%u)\n", cl_init->hostname ?: "", cl_init->rpc_ops->version); /* see if the client already exists */ do { - spin_lock(&nn->nfs_client_lock); + spin_lock(&nfs_client_lock); clp = nfs_match_client(cl_init); if (clp) @@ -533,7 +516,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (new) goto install_client; - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); new = nfs_alloc_client(cl_init); } while (!IS_ERR(new)); @@ -544,8 +527,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, /* install a new client and return with it unready */ install_client: clp = new; - list_add(&clp->cl_share_link, &nn->nfs_client_list); - spin_unlock(&nn->nfs_client_lock); + list_add(&clp->cl_share_link, &nfs_client_list); + spin_unlock(&nfs_client_lock); error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, authflavour, noresvport); @@ -560,7 +543,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, * - make sure it's ready before returning */ found_client: - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); if (new) nfs_free_client(new); @@ -660,7 +643,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { - .net = clp->net, + .net = &init_net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, @@ -714,7 +697,6 @@ static int nfs_start_lockd(struct nfs_server *server) .nfs_version = clp->rpc_ops->version, .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, - .net = clp->net, }; if (nlm_init.nfs_version > 3) @@ -850,7 +832,6 @@ static int nfs_init_server(struct nfs_server *server, .addrlen = data->nfs_server.addrlen, .rpc_ops = &nfs_v2_clientops, .proto = data->nfs_server.protocol, - .net = data->net, }; struct rpc_timeout timeparms; struct nfs_client *clp; @@ -1049,30 +1030,25 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve static void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nn->nfs_client_lock); + spin_lock(&nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); - list_add_tail(&server->master_link, &nn->nfs_volume_list); + list_add_tail(&server->master_link, &nfs_volume_list); clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); } static void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_net *nn; - if (clp == NULL) - return; - nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nn->nfs_client_lock); + spin_lock(&nfs_client_lock); list_del_rcu(&server->client_link); - if (list_empty(&clp->cl_superblocks)) + if (clp && list_empty(&clp->cl_superblocks)) set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); synchronize_rcu(); } @@ -1111,8 +1087,6 @@ static struct nfs_server *nfs_alloc_server(void) return NULL; } - ida_init(&server->openowner_id); - ida_init(&server->lockowner_id); pnfs_init_server(server); return server; @@ -1138,8 +1112,6 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); - ida_destroy(&server->lockowner_id); - ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); bdi_destroy(&server->backing_dev_info); kfree(server); @@ -1215,22 +1187,48 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, } #ifdef CONFIG_NFS_V4 +/* + * NFSv4.0 callback thread helper + * + * Find a client by IP address, protocol version, and minorversion + * + * Called from the pg_authenticate method. The callback identifier + * is not used as it has not been decoded. + * + * Returns NULL if no such client + */ +struct nfs_client * +nfs4_find_client_no_ident(const struct sockaddr *addr) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + if (nfs4_cb_match_client(addr, clp, 0) == false) + continue; + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; + } + spin_unlock(&nfs_client_lock); + return NULL; +} + /* * NFSv4.0 callback thread helper * * Find a client by callback identifier */ struct nfs_client * -nfs4_find_client_ident(struct net *net, int cb_ident) +nfs4_find_client_ident(int cb_ident) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - spin_lock(&nn->nfs_client_lock); - clp = idr_find(&nn->cb_ident_idr, cb_ident); + spin_lock(&nfs_client_lock); + clp = idr_find(&cb_ident_idr, cb_ident); if (clp) atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); return clp; } @@ -1243,14 +1241,13 @@ nfs4_find_client_ident(struct net *net, int cb_ident) * Returns NULL if no such client */ struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, +nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs4_sessionid *sid) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { if (nfs4_cb_match_client(addr, clp, 1) == false) continue; @@ -1263,17 +1260,17 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, continue; atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); return clp; } - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); return NULL; } #else /* CONFIG_NFS_V4_1 */ struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, +nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs4_sessionid *sid) { return NULL; @@ -1288,18 +1285,16 @@ static int nfs4_init_callback(struct nfs_client *clp) int error; if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; - - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, + error = xprt_setup_backchannel( + clp->cl_rpcclient->cl_xprt, NFS41_BC_MIN_CALLBACKS); if (error < 0) return error; } - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + error = nfs_callback_up(clp->cl_mvops->minor_version, + clp->cl_rpcclient->cl_xprt); if (error < 0) { dprintk("%s: failed to start callback. Error = %d\n", __func__, error); @@ -1350,7 +1345,6 @@ int nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour, int noresvport) { - char buf[INET6_ADDRSTRLEN + 1]; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -1366,20 +1360,6 @@ int nfs4_init_client(struct nfs_client *clp, 1, noresvport); if (error < 0) goto error; - - /* If no clientaddr= option was specified, find a usable cb address */ - if (ip_addr == NULL) { - struct sockaddr_storage cb_addr; - struct sockaddr *sap = (struct sockaddr *)&cb_addr; - - error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); - if (error < 0) - goto error; - error = rpc_ntop(sap, buf, sizeof(buf)); - if (error < 0) - goto error; - ip_addr = (const char *)buf; - } strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); error = nfs_idmap_new(clp); @@ -1414,7 +1394,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *ip_addr, rpc_authflavor_t authflavour, int proto, const struct rpc_timeout *timeparms, - u32 minorversion, struct net *net) + u32 minorversion) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -1423,7 +1403,6 @@ static int nfs4_set_client(struct nfs_server *server, .rpc_ops = &nfs_v4_clientops, .proto = proto, .minorversion = minorversion, - .net = net, }; struct nfs_client *clp; int error; @@ -1475,7 +1454,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, .rpc_ops = &nfs_v4_clientops, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, - .net = mds_clp->net, }; struct rpc_timeout ds_timeout = { .to_initval = 15 * HZ, @@ -1603,8 +1581,7 @@ static int nfs4_init_server(struct nfs_server *server, data->auth_flavors[0], data->nfs_server.protocol, &timeparms, - data->minorversion, - data->net); + data->minorversion); if (error < 0) goto error; @@ -1699,10 +1676,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->addrlen, parent_client->cl_ipaddr, data->authflavor, - rpc_protocol(parent_server->client), + parent_server->client->cl_xprt->prot, parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->net); + parent_client->cl_mvops->minor_version); if (error < 0) goto error; @@ -1795,18 +1771,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, return ERR_PTR(error); } -void nfs_clients_init(struct net *net) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - - INIT_LIST_HEAD(&nn->nfs_client_list); - INIT_LIST_HEAD(&nn->nfs_volume_list); -#ifdef CONFIG_NFS_V4 - idr_init(&nn->cb_ident_idr); -#endif - spin_lock_init(&nn->nfs_client_lock); -} - #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_fs_nfs; @@ -1860,15 +1824,13 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; - struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; - struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_server_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = net; + m->private = PDE(inode)->data; return 0; } @@ -1878,11 +1840,9 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) { - struct nfs_net *nn = net_generic(m->private, nfs_net_id); - /* lock the list against modification */ - spin_lock(&nn->nfs_client_lock); - return seq_list_start_head(&nn->nfs_client_list, *_pos); + spin_lock(&nfs_client_lock); + return seq_list_start_head(&nfs_client_list, *_pos); } /* @@ -1890,9 +1850,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); - - return seq_list_next(v, &nn->nfs_client_list, pos); + return seq_list_next(v, &nfs_client_list, pos); } /* @@ -1900,9 +1858,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_server_list_stop(struct seq_file *p, void *v) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); - - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); } /* @@ -1911,10 +1867,9 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nn->nfs_client_list) { + if (v == &nfs_client_list) { seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); return 0; } @@ -1926,14 +1881,12 @@ static int nfs_server_list_show(struct seq_file *m, void *v) if (clp->cl_cons_state != NFS_CS_READY) return 0; - rcu_read_lock(); seq_printf(m, "v%u %s %s %3d %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), atomic_read(&clp->cl_count), clp->cl_hostname); - rcu_read_unlock(); return 0; } @@ -1945,15 +1898,13 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; - struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; - struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_volume_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = net; + m->private = PDE(inode)->data; return 0; } @@ -1963,11 +1914,9 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) { - struct nfs_net *nn = net_generic(m->private, nfs_net_id); - /* lock the list against modification */ - spin_lock(&nn->nfs_client_lock); - return seq_list_start_head(&nn->nfs_volume_list, *_pos); + spin_lock(&nfs_client_lock); + return seq_list_start_head(&nfs_volume_list, *_pos); } /* @@ -1975,9 +1924,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); - - return seq_list_next(v, &nn->nfs_volume_list, pos); + return seq_list_next(v, &nfs_volume_list, pos); } /* @@ -1985,9 +1932,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_volume_list_stop(struct seq_file *p, void *v) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); - - spin_unlock(&nn->nfs_client_lock); + spin_unlock(&nfs_client_lock); } /* @@ -1998,10 +1943,9 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) struct nfs_server *server; struct nfs_client *clp; char dev[8], fsid[17]; - struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nn->nfs_volume_list) { + if (v == &nfs_volume_list) { seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); return 0; } @@ -2016,7 +1960,6 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - rcu_read_lock(); seq_printf(m, "v%u %s %s %-7s %-17s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), @@ -2024,7 +1967,6 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) dev, fsid, nfs_server_fscache_state(server)); - rcu_read_unlock(); return 0; } diff --git a/trunk/fs/nfs/delegation.c b/trunk/fs/nfs/delegation.c index 89af1d269274..7f2654069806 100644 --- a/trunk/fs/nfs/delegation.c +++ b/trunk/fs/nfs/delegation.c @@ -105,7 +105,7 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; - if (!nfs4_stateid_match(&state->stateid, stateid)) + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); @@ -139,7 +139,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, if (delegation != NULL) { spin_lock(&delegation->lock); if (delegation->inode != NULL) { - nfs4_stateid_copy(&delegation->stateid, &res->delegation); + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; oldcred = delegation->cred; @@ -235,7 +236,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = kmalloc(sizeof(*delegation), GFP_NOFS); if (delegation == NULL) return -ENOMEM; - nfs4_stateid_copy(&delegation->stateid, &res->delegation); + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; delegation->change_attr = inode->i_version; @@ -248,22 +250,19 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct old_delegation = rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); if (old_delegation != NULL) { - if (nfs4_stateid_match(&delegation->stateid, - &old_delegation->stateid) && + if (memcmp(&delegation->stateid, &old_delegation->stateid, + sizeof(old_delegation->stateid)) == 0 && delegation->type == old_delegation->type) { goto out; } /* * Deal with broken servers that hand out two * delegations for the same file. - * Allow for upgrades to a WRITE delegation, but - * nothing else. */ dfprintk(FILE, "%s: server %s handed out " "a duplicate delegation!\n", __func__, clp->cl_hostname); - if (delegation->type == old_delegation->type || - !(delegation->type & FMODE_WRITE)) { + if (delegation->type <= old_delegation->type) { freeme = delegation; delegation = NULL; goto out; @@ -456,23 +455,16 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, rcu_read_unlock(); } -static void nfs_delegation_run_state_manager(struct nfs_client *clp) +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) { - if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) - nfs4_schedule_state_manager(clp); + nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); } -void nfs_remove_bad_delegation(struct inode *inode) +static void nfs_delegation_run_state_manager(struct nfs_client *clp) { - struct nfs_delegation *delegation; - - delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); - if (delegation) { - nfs_inode_find_state_and_recover(inode, &delegation->stateid); - nfs_free_delegation(delegation); - } + if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) + nfs4_schedule_state_manager(clp); } -EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); /** * nfs_expire_all_delegation_types @@ -496,6 +488,18 @@ void nfs_expire_all_delegations(struct nfs_client *clp) nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); } +/** + * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN + * @clp: client to process + * + */ +void nfs_handle_cb_pathdown(struct nfs_client *clp) +{ + if (clp == NULL) + return; + nfs_client_mark_return_all_delegations(clp); +} + static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; @@ -527,7 +531,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) /** * nfs_async_inode_return_delegation - asynchronously return a delegation * @inode: inode to process - * @stateid: state ID information + * @stateid: state ID information from CB_RECALL arguments * * Returns zero on success, or a negative errno value. */ @@ -541,7 +545,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { + if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { rcu_read_unlock(); return -ENOENT; } @@ -680,25 +684,21 @@ int nfs_delegations_present(struct nfs_client *clp) * nfs4_copy_delegation_stateid - Copy inode's state ID information * @dst: stateid data structure to fill in * @inode: inode to check - * @flags: delegation type requirement * - * Returns "true" and fills in "dst->data" * if inode had a delegation, - * otherwise "false" is returned. + * Returns one and fills in "dst->data" * if inode had a delegation, + * otherwise zero is returned. */ -bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, - fmode_t flags) +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - bool ret; + int ret = 0; - flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); - ret = (delegation != NULL && (delegation->type & flags) == flags); - if (ret) { - nfs4_stateid_copy(dst, &delegation->stateid); - nfs_mark_delegation_referenced(delegation); + if (delegation != NULL) { + memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); + ret = 1; } rcu_read_unlock(); return ret; diff --git a/trunk/fs/nfs/delegation.h b/trunk/fs/nfs/delegation.h index cd6a7a8dadae..d9322e490c56 100644 --- a/trunk/fs/nfs/delegation.h +++ b/trunk/fs/nfs/delegation.h @@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); +void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); -void nfs_remove_bad_delegation(struct inode *inode); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); @@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); -bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs_have_delegation(struct inode *inode, fmode_t flags); diff --git a/trunk/fs/nfs/dir.c b/trunk/fs/nfs/dir.c index 4aaf0316d76a..32aa6917265a 100644 --- a/trunk/fs/nfs/dir.c +++ b/trunk/fs/nfs/dir.c @@ -207,7 +207,7 @@ struct nfs_cache_array_entry { }; struct nfs_cache_array { - int size; + unsigned int size; int eof_index; u64 last_cookie; struct nfs_cache_array_entry array[0]; @@ -1429,7 +1429,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } open_flags = nd->intent.open.flags; - attr.ia_valid = 0; ctx = create_nfs_open_context(dentry, open_flags); res = ERR_CAST(ctx); @@ -1438,14 +1437,11 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; - attr.ia_valid |= ATTR_MODE; + attr.ia_valid = ATTR_MODE; attr.ia_mode &= ~current_umask(); - } else + } else { open_flags &= ~(O_EXCL | O_CREAT); - - if (open_flags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; + attr.ia_valid = 0; } /* Open the file on the server */ @@ -1499,7 +1495,6 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode; struct inode *dir; struct nfs_open_context *ctx; - struct iattr attr; int openflags, ret = 0; if (nd->flags & LOOKUP_RCU) @@ -1528,27 +1523,19 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); ctx = create_nfs_open_context(dentry, openflags); ret = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; - - attr.ia_valid = 0; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - /* * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); if (IS_ERR(inode)) { ret = PTR_ERR(inode); switch (ret) { diff --git a/trunk/fs/nfs/direct.c b/trunk/fs/nfs/direct.c index 9c7f66ac6cc2..1940f1a56a5f 100644 --- a/trunk/fs/nfs/direct.c +++ b/trunk/fs/nfs/direct.c @@ -265,7 +265,9 @@ static void nfs_direct_read_release(void *calldata) } static const struct rpc_call_ops nfs_read_direct_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_read_result, .rpc_release = nfs_direct_read_release, }; @@ -552,7 +554,9 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; @@ -692,7 +696,9 @@ static void nfs_direct_write_release(void *calldata) } static const struct rpc_call_ops nfs_write_direct_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_write_result, .rpc_release = nfs_direct_write_release, }; diff --git a/trunk/fs/nfs/dns_resolve.c b/trunk/fs/nfs/dns_resolve.c index b3924b8a6000..a6e711ad130f 100644 --- a/trunk/fs/nfs/dns_resolve.c +++ b/trunk/fs/nfs/dns_resolve.c @@ -10,9 +10,8 @@ #include #include -#include "dns_resolve.h" -ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, +ssize_t nfs_dns_resolve_name(char *name, size_t namelen, struct sockaddr *sa, size_t salen) { ssize_t ret; @@ -21,7 +20,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); if (ip_len > 0) - ret = rpc_pton(net, ip_addr, ip_len, sa, salen); + ret = rpc_pton(ip_addr, ip_len, sa, salen); else ret = -ESRCH; kfree(ip_addr); @@ -41,15 +40,15 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include #include #include -#include #include "dns_resolve.h" #include "cache_lib.h" -#include "netns.h" #define NFS_DNS_HASHBITS 4 #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) +static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE]; + struct nfs_dns_ent { struct cache_head h; @@ -225,7 +224,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) len = qword_get(&buf, buf1, sizeof(buf1)); if (len <= 0) goto out; - key.addrlen = rpc_pton(cd->net, buf1, len, + key.addrlen = rpc_pton(buf1, len, (struct sockaddr *)&key.addr, sizeof(key.addr)); @@ -260,6 +259,21 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) return ret; } +static struct cache_detail nfs_dns_resolve = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .hash_table = nfs_dns_table, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + static int do_cache_lookup(struct cache_detail *cd, struct nfs_dns_ent *key, struct nfs_dns_ent **item, @@ -322,8 +336,8 @@ static int do_cache_lookup_wait(struct cache_detail *cd, return ret; } -ssize_t nfs_dns_resolve_name(struct net *net, char *name, - size_t namelen, struct sockaddr *sa, size_t salen) +ssize_t nfs_dns_resolve_name(char *name, size_t namelen, + struct sockaddr *sa, size_t salen) { struct nfs_dns_ent key = { .hostname = name, @@ -331,118 +345,28 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, }; struct nfs_dns_ent *item = NULL; ssize_t ret; - struct nfs_net *nn = net_generic(net, nfs_net_id); - ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item); + ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); if (ret == 0) { if (salen >= item->addrlen) { memcpy(sa, &item->addr, item->addrlen); ret = item->addrlen; } else ret = -EOVERFLOW; - cache_put(&item->h, nn->nfs_dns_resolve); + cache_put(&item->h, &nfs_dns_resolve); } else if (ret == -ENOENT) ret = -ESRCH; return ret; } -int nfs_dns_resolver_cache_init(struct net *net) -{ - int err = -ENOMEM; - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd; - struct cache_head **tbl; - - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), - GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = NFS_DNS_HASHTBL_SIZE, - cd->hash_table = tbl, - cd->name = "dns_resolve", - cd->cache_put = nfs_dns_ent_put, - cd->cache_upcall = nfs_dns_upcall, - cd->cache_parse = nfs_dns_parse, - cd->cache_show = nfs_dns_show, - cd->match = nfs_dns_match, - cd->init = nfs_dns_ent_init, - cd->update = nfs_dns_ent_update, - cd->alloc = nfs_dns_ent_alloc, - - nfs_cache_init(cd); - err = nfs_cache_register_net(net, cd); - if (err) - goto err_reg; - nn->nfs_dns_resolve = cd; - return 0; - -err_reg: - nfs_cache_destroy(cd); - kfree(cd->hash_table); -err_tbl: - kfree(cd); -err_cd: - return err; -} - -void nfs_dns_resolver_cache_destroy(struct net *net) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - - nfs_cache_unregister_net(net, cd); - nfs_cache_destroy(cd); - kfree(cd->hash_table); - kfree(cd); -} - -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) -{ - struct super_block *sb = ptr; - struct net *net = sb->s_fs_info; - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - int ret = 0; - - if (cd == NULL) - return 0; - - if (!try_module_get(THIS_MODULE)) - return 0; - - switch (event) { - case RPC_PIPEFS_MOUNT: - ret = nfs_cache_register_sb(sb, cd); - break; - case RPC_PIPEFS_UMOUNT: - nfs_cache_unregister_sb(sb, cd); - break; - default: - ret = -ENOTSUPP; - break; - } - module_put(THIS_MODULE); - return ret; -} - -static struct notifier_block nfs_dns_resolver_block = { - .notifier_call = rpc_pipefs_event, -}; - int nfs_dns_resolver_init(void) { - return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); + return nfs_cache_register(&nfs_dns_resolve); } void nfs_dns_resolver_destroy(void) { - rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); + nfs_cache_unregister(&nfs_dns_resolve); } + #endif diff --git a/trunk/fs/nfs/dns_resolve.h b/trunk/fs/nfs/dns_resolve.h index 2e4f596d2923..199bb5543a91 100644 --- a/trunk/fs/nfs/dns_resolve.h +++ b/trunk/fs/nfs/dns_resolve.h @@ -15,22 +15,12 @@ static inline int nfs_dns_resolver_init(void) static inline void nfs_dns_resolver_destroy(void) {} - -static inline int nfs_dns_resolver_cache_init(struct net *net) -{ - return 0; -} - -static inline void nfs_dns_resolver_cache_destroy(struct net *net) -{} #else extern int nfs_dns_resolver_init(void); extern void nfs_dns_resolver_destroy(void); -extern int nfs_dns_resolver_cache_init(struct net *net); -extern void nfs_dns_resolver_cache_destroy(struct net *net); #endif -extern ssize_t nfs_dns_resolve_name(struct net *net, char *name, - size_t namelen, struct sockaddr *sa, size_t salen); +extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, + struct sockaddr *sa, size_t salen); #endif diff --git a/trunk/fs/nfs/file.c b/trunk/fs/nfs/file.c index 4fdaaa63cf1c..c43a452f7da2 100644 --- a/trunk/fs/nfs/file.c +++ b/trunk/fs/nfs/file.c @@ -530,8 +530,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (mapping != dentry->d_inode->i_mapping) goto out_unlock; - wait_on_page_writeback(page); - pagelen = nfs_page_length(page); if (pagelen == 0) goto out_unlock; diff --git a/trunk/fs/nfs/fscache.c b/trunk/fs/nfs/fscache.c index ae65c16b3670..419119c371bf 100644 --- a/trunk/fs/nfs/fscache.c +++ b/trunk/fs/nfs/fscache.c @@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_server *nfss = NFS_SERVER(inode); - NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); + struct fscache_cookie *old = nfsi->fscache; nfs_fscache_inode_lock(inode); if (nfsi->fscache) { diff --git a/trunk/fs/nfs/idmap.c b/trunk/fs/nfs/idmap.c index b7f348bb618b..a1bbf7780dfc 100644 --- a/trunk/fs/nfs/idmap.c +++ b/trunk/fs/nfs/idmap.c @@ -34,29 +34,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include -#include -#include +#include +#include +#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include - -#include "internal.h" -#include "netns.h" - -#define NFS_UINT_MAXLEN 11 - -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600; -static const struct cred *id_resolver_cache; -static struct key_type key_type_id_resolver_legacy; - /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields @@ -160,7 +142,24 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) return snprintf(buf, buflen, "%u", id); } -static struct key_type key_type_id_resolver = { +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFS_UINT_MAXLEN 11 + +const struct cred *id_resolver_cache; + +struct key_type key_type_id_resolver = { .name = "id_resolver", .instantiate = user_instantiate, .match = user_match, @@ -170,14 +169,13 @@ static struct key_type key_type_id_resolver = { .read = user_read, }; -static int nfs_idmap_init_keyring(void) +int nfs_idmap_init(void) { struct cred *cred; struct key *keyring; int ret = 0; - printk(KERN_NOTICE "NFS: Registering the %s key type\n", - key_type_id_resolver.name); + printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); cred = prepare_kernel_cred(NULL); if (!cred) @@ -213,7 +211,7 @@ static int nfs_idmap_init_keyring(void) return ret; } -static void nfs_idmap_quit_keyring(void) +void nfs_idmap_quit(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); @@ -248,10 +246,8 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, return desclen; } -static ssize_t nfs_idmap_request_key(struct key_type *key_type, - const char *name, size_t namelen, - const char *type, void *data, - size_t data_size, struct idmap *idmap) +static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, + const char *type, void *data, size_t data_size) { const struct cred *saved_cred; struct key *rkey; @@ -264,12 +260,8 @@ static ssize_t nfs_idmap_request_key(struct key_type *key_type, goto out; saved_cred = override_creds(id_resolver_cache); - if (idmap) - rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); - else - rkey = request_key(&key_type_id_resolver, desc, ""); + rkey = request_key(&key_type_id_resolver, desc, ""); revert_creds(saved_cred); - kfree(desc); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); @@ -302,46 +294,31 @@ static ssize_t nfs_idmap_request_key(struct key_type *key_type, return ret; } -static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, - const char *type, void *data, - size_t data_size, struct idmap *idmap) -{ - ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, - name, namelen, type, data, - data_size, NULL); - if (ret < 0) { - ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, - name, namelen, type, data, - data_size, idmap); - } - return ret; -} /* ID -> Name */ -static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, - size_t buflen, struct idmap *idmap) +static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) { char id_str[NFS_UINT_MAXLEN]; int id_len; ssize_t ret; id_len = snprintf(id_str, sizeof(id_str), "%u", id); - ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); + ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); if (ret < 0) return -EINVAL; return ret; } /* Name -> ID */ -static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type, - __u32 *id, struct idmap *idmap) +static int nfs_idmap_lookup_id(const char *name, size_t namelen, + const char *type, __u32 *id) { char id_str[NFS_UINT_MAXLEN]; long id_long; ssize_t data_size; int ret = 0; - data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap); + data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); if (data_size <= 0) { ret = -EINVAL; } else { @@ -351,103 +328,114 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ return ret; } -/* idmap classic begins here */ -module_param(nfs_idmap_cache_timeout, int, 0644); +int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +{ + if (nfs_map_string_to_numeric(name, namelen, uid)) + return 0; + return nfs_idmap_lookup_id(name, namelen, "uid", uid); +} -struct idmap { - struct rpc_pipe *idmap_pipe; - struct key_construction *idmap_key_cons; +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) +{ + if (nfs_map_string_to_numeric(name, namelen, gid)) + return 0; + return nfs_idmap_lookup_id(name, namelen, "gid", gid); +} + +int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) +{ + int ret = -EINVAL; + + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); + if (ret < 0) + ret = nfs_map_numeric_to_string(uid, buf, buflen); + return ret; +} +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) +{ + int ret = -EINVAL; + + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); + if (ret < 0) + ret = nfs_map_numeric_to_string(gid, buf, buflen); + return ret; +} + +#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "nfs4_fs.h" + +#define IDMAP_HASH_SZ 128 + +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600 * HZ; + +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + +struct idmap_hashent { + unsigned long ih_expires; + __u32 ih_id; + size_t ih_namelen; + char ih_name[IDMAP_NAMESZ]; }; -enum { - Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err +struct idmap_hashtable { + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; }; -static const match_table_t nfs_idmap_tokens = { - { Opt_find_uid, "uid:%s" }, - { Opt_find_gid, "gid:%s" }, - { Opt_find_user, "user:%s" }, - { Opt_find_group, "group:%s" }, - { Opt_find_err, NULL } +struct idmap { + struct dentry *idmap_dentry; + wait_queue_head_t idmap_wq; + struct idmap_msg idmap_im; + struct mutex idmap_lock; /* Serializes upcalls */ + struct mutex idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; }; -static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); +static unsigned int fnvhash32(const void *, size_t); + static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, }; -static struct key_type key_type_id_resolver_legacy = { - .name = "id_resolver", - .instantiate = user_instantiate, - .match = user_match, - .revoke = user_revoke, - .destroy = user_destroy, - .describe = user_describe, - .read = user_read, - .request_key = nfs_idmap_legacy_upcall, -}; - -static void __nfs_idmap_unregister(struct rpc_pipe *pipe) -{ - if (pipe->dentry) - rpc_unlink(pipe->dentry); -} - -static int __nfs_idmap_register(struct dentry *dir, - struct idmap *idmap, - struct rpc_pipe *pipe) -{ - struct dentry *dentry; - - dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - pipe->dentry = dentry; - return 0; -} - -static void nfs_idmap_unregister(struct nfs_client *clp, - struct rpc_pipe *pipe) -{ - struct net *net = clp->net; - struct super_block *pipefs_sb; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - __nfs_idmap_unregister(pipe); - rpc_put_sb_net(net); - } -} - -static int nfs_idmap_register(struct nfs_client *clp, - struct idmap *idmap, - struct rpc_pipe *pipe) -{ - struct net *net = clp->net; - struct super_block *pipefs_sb; - int err = 0; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - if (clp->cl_rpcclient->cl_dentry) - err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, - idmap, pipe); - rpc_put_sb_net(net); - } - return err; -} - int nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; - struct rpc_pipe *pipe; int error; BUG_ON(clp->cl_idmap != NULL); @@ -456,19 +444,19 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; - pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0); - if (IS_ERR(pipe)) { - error = PTR_ERR(pipe); + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, + "idmap", idmap, &idmap_upcall_ops, 0); + if (IS_ERR(idmap->idmap_dentry)) { + error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); return error; } - error = nfs_idmap_register(clp, idmap, pipe); - if (error) { - rpc_destroy_pipe_data(pipe); - kfree(idmap); - return error; - } - idmap->idmap_pipe = pipe; + + mutex_init(&idmap->idmap_lock); + mutex_init(&idmap->idmap_im_lock); + init_waitqueue_head(&idmap->idmap_wq); + idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; + idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; clp->cl_idmap = idmap; return 0; @@ -481,220 +469,211 @@ nfs_idmap_delete(struct nfs_client *clp) if (!idmap) return; - nfs_idmap_unregister(clp, idmap->idmap_pipe); - rpc_destroy_pipe_data(idmap->idmap_pipe); + rpc_unlink(idmap->idmap_dentry); clp->cl_idmap = NULL; kfree(idmap); } -static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, - struct super_block *sb) +/* + * Helper routines for manipulating the hashtable + */ +static inline struct idmap_hashent * +idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) { - int err = 0; - - switch (event) { - case RPC_PIPEFS_MOUNT: - BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); - err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, - clp->cl_idmap, - clp->cl_idmap->idmap_pipe); - break; - case RPC_PIPEFS_UMOUNT: - if (clp->cl_idmap->idmap_pipe) { - struct dentry *parent; - - parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; - __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); - /* - * Note: This is a dirty hack. SUNRPC hook has been - * called already but simple_rmdir() call for the - * directory returned with error because of idmap pipe - * inside. Thus now we have to remove this directory - * here. - */ - if (rpc_rmdir(parent)) - printk(KERN_ERR "NFS: %s: failed to remove " - "clnt dir!\n", __func__); - } - break; - default: - printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__, - event); - return -ENOTSUPP; - } - return err; -} - -static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *cl_dentry; - struct nfs_client *clp; - - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (clp->rpc_ops != &nfs_v4_clientops) - continue; - cl_dentry = clp->cl_idmap->idmap_pipe->dentry; - if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) || - ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry)) - continue; - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; - } - spin_unlock(&nn->nfs_client_lock); - return NULL; + return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; } -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) +static struct idmap_hashent * +idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) { - struct super_block *sb = ptr; - struct nfs_client *clp; - int error = 0; + struct idmap_hashent *he = idmap_name_hash(h, name, len); - while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { - error = __rpc_pipefs_event(clp, event, sb); - nfs_put_client(clp); - if (error) - break; - } - return error; + if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) + return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; + return he; } -#define PIPEFS_NFS_PRIO 1 +static inline struct idmap_hashent * +idmap_id_hash(struct idmap_hashtable* h, __u32 id) +{ + return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; +} -static struct notifier_block nfs_idmap_block = { - .notifier_call = rpc_pipefs_event, - .priority = SUNRPC_PIPEFS_NFS_PRIO, -}; +static struct idmap_hashent * +idmap_lookup_id(struct idmap_hashtable *h, __u32 id) +{ + struct idmap_hashent *he = idmap_id_hash(h, id); + if (he->ih_id != id || he->ih_namelen == 0) + return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; + return he; +} -int nfs_idmap_init(void) +/* + * Routines for allocating new entries in the hashtable. + * For now, we just have 1 entry per bucket, so it's all + * pretty trivial. + */ +static inline struct idmap_hashent * +idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) { - int ret; - ret = nfs_idmap_init_keyring(); - if (ret != 0) - goto out; - ret = rpc_pipefs_notifier_register(&nfs_idmap_block); - if (ret != 0) - nfs_idmap_quit_keyring(); -out: - return ret; + return idmap_name_hash(h, name, len); } -void nfs_idmap_quit(void) +static inline struct idmap_hashent * +idmap_alloc_id(struct idmap_hashtable *h, __u32 id) { - rpc_pipefs_notifier_unregister(&nfs_idmap_block); - nfs_idmap_quit_keyring(); + return idmap_id_hash(h, id); } -static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, - struct rpc_pipe_msg *msg) +static void +idmap_update_entry(struct idmap_hashent *he, const char *name, + size_t namelen, __u32 id) { - substring_t substr; - int token, ret; + he->ih_id = id; + memcpy(he->ih_name, name, namelen); + he->ih_name[namelen] = '\0'; + he->ih_namelen = namelen; + he->ih_expires = jiffies + nfs_idmap_cache_timeout; +} - memset(im, 0, sizeof(*im)); - memset(msg, 0, sizeof(*msg)); +/* + * Name -> ID + */ +static int +nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, + const char *name, size_t namelen, __u32 *id) +{ + struct rpc_pipe_msg msg; + struct idmap_msg *im; + struct idmap_hashent *he; + DECLARE_WAITQUEUE(wq, current); + int ret = -EIO; + + im = &idmap->idmap_im; + + /* + * String sanity checks + * Note that the userland daemon expects NUL terminated strings + */ + for (;;) { + if (namelen == 0) + return -EINVAL; + if (name[namelen-1] != '\0') + break; + namelen--; + } + if (namelen >= IDMAP_NAMESZ) + return -EINVAL; - im->im_type = IDMAP_TYPE_GROUP; - token = match_token(desc, nfs_idmap_tokens, &substr); + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); - switch (token) { - case Opt_find_uid: - im->im_type = IDMAP_TYPE_USER; - case Opt_find_gid: - im->im_conv = IDMAP_CONV_NAMETOID; - ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ); - break; + he = idmap_lookup_name(h, name, namelen); + if (he != NULL) { + *id = he->ih_id; + ret = 0; + goto out; + } - case Opt_find_user: - im->im_type = IDMAP_TYPE_USER; - case Opt_find_group: - im->im_conv = IDMAP_CONV_IDTONAME; - ret = match_int(&substr, &im->im_id); - break; + memset(im, 0, sizeof(*im)); + memcpy(im->im_name, name, namelen); - default: - ret = -EINVAL; + im->im_type = h->h_type; + im->im_conv = IDMAP_CONV_NAMETOID; + + memset(&msg, 0, sizeof(msg)); + msg.data = im; + msg.len = sizeof(*im); + + add_wait_queue(&idmap->idmap_wq, &wq); + if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + remove_wait_queue(&idmap->idmap_wq, &wq); goto out; } - msg->data = im; - msg->len = sizeof(struct idmap_msg); + set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&idmap->idmap_im_lock); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idmap->idmap_wq, &wq); + mutex_lock(&idmap->idmap_im_lock); -out: + if (im->im_status & IDMAP_STATUS_SUCCESS) { + *id = im->im_id; + ret = 0; + } + + out: + memset(im, 0, sizeof(*im)); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); return ret; } -static int nfs_idmap_legacy_upcall(struct key_construction *cons, - const char *op, - void *aux) +/* + * ID -> Name + */ +static int +nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, + __u32 id, char *name) { - struct rpc_pipe_msg *msg; + struct rpc_pipe_msg msg; struct idmap_msg *im; - struct idmap *idmap = (struct idmap *)aux; - struct key *key = cons->key; - int ret; - - /* msg and im are freed in idmap_pipe_destroy_msg */ - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (IS_ERR(msg)) { - ret = PTR_ERR(msg); - goto out0; - } - - im = kmalloc(sizeof(*im), GFP_KERNEL); - if (IS_ERR(im)) { - ret = PTR_ERR(im); - goto out1; - } + struct idmap_hashent *he; + DECLARE_WAITQUEUE(wq, current); + int ret = -EIO; + unsigned int len; - ret = nfs_idmap_prepare_message(key->description, im, msg); - if (ret < 0) - goto out2; + im = &idmap->idmap_im; - idmap->idmap_key_cons = cons; + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); - ret = rpc_queue_upcall(idmap->idmap_pipe, msg); - if (ret < 0) - goto out2; + he = idmap_lookup_id(h, id); + if (he) { + memcpy(name, he->ih_name, he->ih_namelen); + ret = he->ih_namelen; + goto out; + } - return ret; + memset(im, 0, sizeof(*im)); + im->im_type = h->h_type; + im->im_conv = IDMAP_CONV_IDTONAME; + im->im_id = id; -out2: - kfree(im); -out1: - kfree(msg); -out0: - key_revoke(cons->key); - key_revoke(cons->authkey); - return ret; -} + memset(&msg, 0, sizeof(msg)); + msg.data = im; + msg.len = sizeof(*im); -static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) -{ - return key_instantiate_and_link(key, data, strlen(data) + 1, - id_resolver_cache->thread_keyring, - authkey); -} + add_wait_queue(&idmap->idmap_wq, &wq); -static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) -{ - char id_str[NFS_UINT_MAXLEN]; - int ret = -EINVAL; + if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { + remove_wait_queue(&idmap->idmap_wq, &wq); + goto out; + } - switch (im->im_conv) { - case IDMAP_CONV_NAMETOID: - sprintf(id_str, "%d", im->im_id); - ret = nfs_idmap_instantiate(key, authkey, id_str); - break; - case IDMAP_CONV_IDTONAME: - ret = nfs_idmap_instantiate(key, authkey, im->im_name); - break; + set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&idmap->idmap_im_lock); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idmap->idmap_wq, &wq); + mutex_lock(&idmap->idmap_im_lock); + + if (im->im_status & IDMAP_STATUS_SUCCESS) { + if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) + goto out; + memcpy(name, im->im_name, len); + ret = len; } + out: + memset(im, 0, sizeof(*im)); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); return ret; } @@ -703,51 +682,115 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons = idmap->idmap_key_cons; - struct idmap_msg im; + struct idmap_msg im_in, *im = &idmap->idmap_im; + struct idmap_hashtable *h; + struct idmap_hashent *he = NULL; size_t namelen_in; int ret; - if (mlen != sizeof(im)) { - ret = -ENOSPC; + if (mlen != sizeof(im_in)) + return -ENOSPC; + + if (copy_from_user(&im_in, src, mlen) != 0) + return -EFAULT; + + mutex_lock(&idmap->idmap_im_lock); + + ret = mlen; + im->im_status = im_in.im_status; + /* If we got an error, terminate now, and wake up pending upcalls */ + if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { + wake_up(&idmap->idmap_wq); goto out; } - if (copy_from_user(&im, src, mlen) != 0) { - ret = -EFAULT; + /* Sanity checking of strings */ + ret = -EINVAL; + namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) goto out; - } - if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(idmap->idmap_key_cons, -ENOKEY); - goto out_incomplete; + switch (im_in.im_type) { + case IDMAP_TYPE_USER: + h = &idmap->idmap_user_hash; + break; + case IDMAP_TYPE_GROUP: + h = &idmap->idmap_group_hash; + break; + default: + goto out; } - namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); - if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { - ret = -EINVAL; + switch (im_in.im_conv) { + case IDMAP_CONV_IDTONAME: + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_IDTONAME + && im->im_type == im_in.im_type + && im->im_id == im_in.im_id) { + /* Yes: copy string, including the terminating '\0' */ + memcpy(im->im_name, im_in.im_name, namelen_in); + im->im_name[namelen_in] = '\0'; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_id(h, im_in.im_id); + break; + case IDMAP_CONV_NAMETOID: + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_NAMETOID + && im->im_type == im_in.im_type + && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in + && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { + im->im_id = im_in.im_id; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_name(h, im_in.im_name, namelen_in); + break; + default: goto out; } - ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); - if (ret >= 0) { - key_set_timeout(cons->key, nfs_idmap_cache_timeout); - ret = mlen; - } - + /* If the entry is valid, also copy it to the cache */ + if (he != NULL) + idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); + ret = mlen; out: - complete_request_key(idmap->idmap_key_cons, ret); -out_incomplete: + mutex_unlock(&idmap->idmap_im_lock); return ret; } static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { - /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(msg->data); - kfree(msg); + struct idmap_msg *im = msg->data; + struct idmap *idmap = container_of(im, struct idmap, idmap_im); + + if (msg->errno >= 0) + return; + mutex_lock(&idmap->idmap_im_lock); + im->im_status = IDMAP_STATUS_LOOKUPFAIL; + wake_up(&idmap->idmap_wq); + mutex_unlock(&idmap->idmap_im_lock); +} + +/* + * Fowler/Noll/Vo hash + * http://www.isthe.com/chongo/tech/comp/fnv/ + */ + +#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ +#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ + +static unsigned int fnvhash32(const void *buf, size_t buflen) +{ + const unsigned char *p, *end = (const unsigned char *)buf + buflen; + unsigned int hash = FNV_1_32; + + for (p = buf; p < end; p++) { + hash *= FNV_P_32; + hash ^= (unsigned int)*p; + } + + return hash; } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) @@ -756,16 +799,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_ if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; - return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); + return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = server->nfs_client->cl_idmap; - if (nfs_map_string_to_numeric(name, namelen, gid)) + if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; - return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); + return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) @@ -774,19 +817,21 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s int ret = -EINVAL; if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); + ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); + ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); if (ret < 0) - ret = nfs_map_numeric_to_string(gid, buf, buflen); + ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } + +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ diff --git a/trunk/fs/nfs/inode.c b/trunk/fs/nfs/inode.c index 7bb4d13c1cd5..f649fba8c384 100644 --- a/trunk/fs/nfs/inode.c +++ b/trunk/fs/nfs/inode.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -52,7 +51,6 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" -#include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -390,10 +388,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", + dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), - nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); out: @@ -404,7 +401,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) goto out; } -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) int nfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -426,7 +423,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; - if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) + if ((attr->ia_valid & ~ATTR_FILE) == 0) return 0; /* Write all dirty data */ @@ -1047,67 +1044,6 @@ struct nfs_fh *nfs_alloc_fhandle(void) return fh; } -#ifdef NFS_DEBUG -/* - * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle - * in the same way that wireshark does - * - * @fh: file handle - * - * For debugging only. - */ -u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) -{ - /* wireshark uses 32-bit AUTODIN crc and does a bitwise - * not on the result */ - return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size); -} - -/* - * _nfs_display_fhandle - display an NFS file handle on the console - * - * @fh: file handle to display - * @caption: display caption - * - * For debugging only. - */ -void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) -{ - unsigned short i; - - if (fh == NULL || fh->size == 0) { - printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); - return; - } - - printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n", - caption, fh, fh->size, _nfs_display_fhandle_hash(fh)); - for (i = 0; i < fh->size; i += 16) { - __be32 *pos = (__be32 *)&fh->data[i]; - - switch ((fh->size - i - 1) >> 2) { - case 0: - printk(KERN_DEFAULT " %08x\n", - be32_to_cpup(pos)); - break; - case 1: - printk(KERN_DEFAULT " %08x %08x\n", - be32_to_cpup(pos), be32_to_cpup(pos + 1)); - break; - case 2: - printk(KERN_DEFAULT " %08x %08x %08x\n", - be32_to_cpup(pos), be32_to_cpup(pos + 1), - be32_to_cpup(pos + 2)); - break; - default: - printk(KERN_DEFAULT " %08x %08x %08x %08x\n", - be32_to_cpup(pos), be32_to_cpup(pos + 1), - be32_to_cpup(pos + 2), be32_to_cpup(pos + 3)); - } - } -} -#endif - /** * nfs_inode_attrs_need_update - check if the inode attributes need updating * @inode - pointer to inode @@ -1275,9 +1211,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long now = jiffies; unsigned long save_cache_validity; - dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, - nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) @@ -1471,7 +1406,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ - printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", __func__, inode->i_ino, inode->i_mode, fattr->mode); out_err: /* @@ -1560,7 +1495,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_LIST_HEAD(&nfsi->commit_list); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->npages = 0; nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); @@ -1617,28 +1552,6 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } -int nfs_net_id; -EXPORT_SYMBOL_GPL(nfs_net_id); - -static int nfs_net_init(struct net *net) -{ - nfs_clients_init(net); - return nfs_dns_resolver_cache_init(net); -} - -static void nfs_net_exit(struct net *net) -{ - nfs_dns_resolver_cache_destroy(net); - nfs_cleanup_cb_ident_idr(net); -} - -static struct pernet_operations nfs_net_ops = { - .init = nfs_net_init, - .exit = nfs_net_exit, - .id = &nfs_net_id, - .size = sizeof(struct nfs_net), -}; - /* * Initialize NFS */ @@ -1647,14 +1560,10 @@ static int __init init_nfs_fs(void) int err; err = nfs_idmap_init(); - if (err < 0) - goto out10; - - err = nfs_dns_resolver_init(); if (err < 0) goto out9; - err = register_pernet_subsys(&nfs_net_ops); + err = nfs_dns_resolver_init(); if (err < 0) goto out8; @@ -1691,14 +1600,14 @@ static int __init init_nfs_fs(void) goto out0; #ifdef CONFIG_PROC_FS - rpc_proc_register(&init_net, &nfs_rpcstat); + rpc_proc_register(&nfs_rpcstat); #endif if ((err = register_nfs_fs()) != 0) goto out; return 0; out: #ifdef CONFIG_PROC_FS - rpc_proc_unregister(&init_net, "nfs"); + rpc_proc_unregister("nfs"); #endif nfs_destroy_directcache(); out0: @@ -1716,12 +1625,10 @@ static int __init init_nfs_fs(void) out6: nfs_fscache_unregister(); out7: - unregister_pernet_subsys(&nfs_net_ops); -out8: nfs_dns_resolver_destroy(); -out9: +out8: nfs_idmap_quit(); -out10: +out9: return err; } @@ -1733,12 +1640,12 @@ static void __exit exit_nfs_fs(void) nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); nfs_fscache_unregister(); - unregister_pernet_subsys(&nfs_net_ops); nfs_dns_resolver_destroy(); nfs_idmap_quit(); #ifdef CONFIG_PROC_FS - rpc_proc_unregister(&init_net, "nfs"); + rpc_proc_unregister("nfs"); #endif + nfs_cleanup_cb_ident_idr(); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/trunk/fs/nfs/internal.h b/trunk/fs/nfs/internal.h index 2476dc69365f..8102db9b926c 100644 --- a/trunk/fs/nfs/internal.h +++ b/trunk/fs/nfs/internal.h @@ -123,7 +123,6 @@ struct nfs_parsed_mount_data { } nfs_server; struct security_mnt_opts lsm_opts; - struct net *net; }; /* mount_clnt.c */ @@ -138,22 +137,20 @@ struct nfs_mount_request { int noresvport; unsigned int *auth_flav_len; rpc_authflavor_t *auth_flavs; - struct net *net; }; extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ -extern const struct rpc_program nfs_program; -extern void nfs_clients_init(struct net *net); +extern struct rpc_program nfs_program; -extern void nfs_cleanup_cb_ident_idr(struct net *); +extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs4_find_client_ident(struct net *, int); +extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); +extern struct nfs_client *nfs4_find_client_ident(int); extern struct nfs_client * -nfs4_find_client_sessionid(struct net *, const struct sockaddr *, - struct nfs4_sessionid *); +nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); @@ -332,8 +329,6 @@ void nfs_retry_commit(struct list_head *page_list, void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(void *data); void nfs_commit_release_pages(struct nfs_write_data *data); -void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); -void nfs_request_remove_commit_list(struct nfs_page *req); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/trunk/fs/nfs/mount_clnt.c b/trunk/fs/nfs/mount_clnt.c index 8e65c7f1f87c..d4c2d6b7507e 100644 --- a/trunk/fs/nfs/mount_clnt.c +++ b/trunk/fs/nfs/mount_clnt.c @@ -16,7 +16,7 @@ #include #include "internal.h" -#ifdef NFS_DEBUG +#ifdef RPC_DEBUG # define NFSDBG_FACILITY NFSDBG_MOUNT #endif @@ -67,7 +67,7 @@ enum { MOUNTPROC3_EXPORT = 5, }; -static const struct rpc_program mnt_program; +static struct rpc_program mnt_program; /* * Defined by OpenGroup XNFS Version 3W, chapter 8 @@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { - .net = info->net, + .net = &init_net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { - .net = info->net, + .net = &init_net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, @@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = { }; -static const struct rpc_version mnt_version1 = { +static struct rpc_version mnt_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(mnt_procedures), .procs = mnt_procedures, }; -static const struct rpc_version mnt_version3 = { +static struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), .procs = mnt3_procedures, }; -static const struct rpc_version *mnt_version[] = { +static struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, @@ -509,7 +509,7 @@ static const struct rpc_version *mnt_version[] = { static struct rpc_stat mnt_stats; -static const struct rpc_program mnt_program = { +static struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), diff --git a/trunk/fs/nfs/namespace.c b/trunk/fs/nfs/namespace.c index 1807866bb3ab..8102391bb374 100644 --- a/trunk/fs/nfs/namespace.c +++ b/trunk/fs/nfs/namespace.c @@ -276,10 +276,7 @@ struct vfsmount *nfs_d_automount(struct path *path) nfs_free_fattr(fattr); nfs_free_fhandle(fh); out_nofree: - if (IS_ERR(mnt)) - dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt)); - else - dprintk("<-- %s() = %p\n", __func__, mnt); + dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); return mnt; } diff --git a/trunk/fs/nfs/netns.h b/trunk/fs/nfs/netns.h deleted file mode 100644 index aa14ec303e94..000000000000 --- a/trunk/fs/nfs/netns.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __NFS_NETNS_H__ -#define __NFS_NETNS_H__ - -#include -#include - -struct bl_dev_msg { - int32_t status; - uint32_t major, minor; -}; - -struct nfs_net { - struct cache_detail *nfs_dns_resolve; - struct rpc_pipe *bl_device_pipe; - struct bl_dev_msg bl_mount_reply; - wait_queue_head_t bl_wq; - struct list_head nfs_client_list; - struct list_head nfs_volume_list; -#ifdef CONFIG_NFS_V4 - struct idr cb_ident_idr; /* Protected by nfs_client_lock */ -#endif - spinlock_t nfs_client_lock; -}; - -extern int nfs_net_id; - -#endif diff --git a/trunk/fs/nfs/nfs2xdr.c b/trunk/fs/nfs/nfs2xdr.c index 1f56000fabbd..792cb13a4304 100644 --- a/trunk/fs/nfs/nfs2xdr.c +++ b/trunk/fs/nfs/nfs2xdr.c @@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = { PROC(STATFS, fhandle, statfsres, 0), }; -const struct rpc_version nfs_version2 = { +struct rpc_version nfs_version2 = { .number = 2, .nrprocs = ARRAY_SIZE(nfs_procedures), .procs = nfs_procedures diff --git a/trunk/fs/nfs/nfs3acl.c b/trunk/fs/nfs/nfs3acl.c index e4498dc351a8..7ef23979896d 100644 --- a/trunk/fs/nfs/nfs3acl.c +++ b/trunk/fs/nfs/nfs3acl.c @@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) .pages = pages, }; struct nfs3_getaclres res = { - NULL, + 0 }; struct rpc_message msg = { .rpc_argp = &args, diff --git a/trunk/fs/nfs/nfs3proc.c b/trunk/fs/nfs/nfs3proc.c index 5242eae6711a..91943953a370 100644 --- a/trunk/fs/nfs/nfs3proc.c +++ b/trunk/fs/nfs/nfs3proc.c @@ -428,11 +428,6 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } -static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) -{ - rpc_call_start(task); -} - static int nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) { @@ -450,11 +445,6 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; } -static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) -{ - rpc_call_start(task); -} - static int nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) @@ -824,11 +814,6 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) -{ - rpc_call_start(task); -} - static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) @@ -843,11 +828,6 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - rpc_call_start(task); -} - static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) @@ -884,11 +864,9 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .create = nfs3_proc_create, .remove = nfs3_proc_remove, .unlink_setup = nfs3_proc_unlink_setup, - .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, .unlink_done = nfs3_proc_unlink_done, .rename = nfs3_proc_rename, .rename_setup = nfs3_proc_rename_setup, - .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, .rename_done = nfs3_proc_rename_done, .link = nfs3_proc_link, .symlink = nfs3_proc_symlink, @@ -901,10 +879,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, - .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, - .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_done = nfs3_commit_done, diff --git a/trunk/fs/nfs/nfs3xdr.c b/trunk/fs/nfs/nfs3xdr.c index a77cc9a3ce55..183c6b123d0f 100644 --- a/trunk/fs/nfs/nfs3xdr.c +++ b/trunk/fs/nfs/nfs3xdr.c @@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = { PROC(COMMIT, commit, commit, 5), }; -const struct rpc_version nfs_version3 = { +struct rpc_version nfs_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nfs3_procedures), .procs = nfs3_procedures @@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { }, }; -const struct rpc_version nfsacl_version3 = { +struct rpc_version nfsacl_version3 = { .number = 3, .nrprocs = sizeof(nfs3_acl_procedures)/ sizeof(nfs3_acl_procedures[0]), diff --git a/trunk/fs/nfs/nfs4_fs.h b/trunk/fs/nfs/nfs4_fs.h index 97ecc863dd76..4d7d0aedc101 100644 --- a/trunk/fs/nfs/nfs4_fs.h +++ b/trunk/fs/nfs/nfs4_fs.h @@ -20,6 +20,7 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, + NFS4CLNT_LAYOUTRECALL, NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, @@ -43,7 +44,7 @@ struct nfs4_minor_version_ops { struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); - bool (*match_stateid)(const nfs4_stateid *, + int (*validate_stateid)(struct nfs_delegation *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -52,25 +53,26 @@ struct nfs4_minor_version_ops { const struct nfs4_state_maintenance_ops *state_renewal_ops; }; -struct nfs_unique_id { - struct rb_node rb_node; - __u64 id; +/* + * struct rpc_sequence ensures that RPC calls are sent in the exact + * order that they appear on the list. + */ +struct rpc_sequence { + struct rpc_wait_queue wait; /* RPC call delay queue */ + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ }; #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { - int owner_id; + struct rpc_sequence *sequence; int flags; u32 counter; - spinlock_t lock; /* Protects the list */ - struct list_head list; /* Defines sequence of RPC calls */ - struct rpc_wait_queue wait; /* RPC call delay queue */ }; struct nfs_seqid { struct nfs_seqid_counter *sequence; struct list_head list; - struct rpc_task *task; }; static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) @@ -79,12 +81,18 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status seqid->flags |= NFS_SEQID_CONFIRMED; } +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; +}; + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. */ struct nfs4_state_owner { + struct nfs_unique_id so_owner_id; struct nfs_server *so_server; struct list_head so_lru; unsigned long so_expires; @@ -97,6 +105,7 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; + struct rpc_sequence so_sequence; }; enum { @@ -137,6 +146,8 @@ struct nfs4_lock_state { #define NFS_LOCK_INITIALIZED 1 int ls_flags; struct nfs_seqid_counter ls_seqid; + struct rpc_sequence ls_sequence; + struct nfs_unique_id ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; struct nfs4_lock_owner ls_owner; @@ -182,7 +193,6 @@ struct nfs4_exception { long timeout; int retry; struct nfs4_state *state; - struct inode *inode; }; struct nfs4_state_recovery_ops { @@ -214,7 +224,7 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, boo extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); -extern int nfs4_release_lockowner(struct nfs4_lock_state *); +extern void nfs4_release_lockowner(const struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; #if defined(CONFIG_NFS_V4_1) @@ -223,13 +233,12 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } -extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task); + int cache_reply, struct rpc_task *task); extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task); + int cache_reply, struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *); @@ -260,7 +269,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser static inline int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task) + int cache_reply, struct rpc_task *task) { return 0; } @@ -310,7 +319,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) } #endif /* CONFIG_NFS_V4_1 */ -extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t); +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_purge_state_owners(struct nfs_server *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); @@ -318,8 +327,6 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); -extern void nfs_inode_find_state_and_recover(struct inode *inode, - const nfs4_stateid *stateid); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); @@ -330,8 +337,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, - fmode_t, fl_owner_t, pid_t); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); @@ -340,8 +346,6 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); -extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); - extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ @@ -353,16 +357,6 @@ struct nfs4_mount_data; extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; -static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) -{ - memcpy(dst, src, sizeof(*dst)); -} - -static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) -{ - return memcmp(dst, src, sizeof(*dst)) == 0; -} - #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/trunk/fs/nfs/nfs4filelayout.c b/trunk/fs/nfs/nfs4filelayout.c index 634c0bcb4fd6..71ec08617e23 100644 --- a/trunk/fs/nfs/nfs4filelayout.c +++ b/trunk/fs/nfs/nfs4filelayout.c @@ -33,10 +33,7 @@ #include #include -#include - #include "internal.h" -#include "delegation.h" #include "nfs4filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -87,27 +84,12 @@ static int filelayout_async_handle_error(struct rpc_task *task, struct nfs_client *clp, int *reset) { - struct nfs_server *mds_server = NFS_SERVER(state->inode); - struct nfs_client *mds_client = mds_server->nfs_client; - if (task->tk_status >= 0) return 0; + *reset = 0; switch (task->tk_status) { - /* MDS state errors */ - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - nfs_remove_bad_delegation(state->inode); - case -NFS4ERR_OPENMODE: - nfs4_schedule_stateid_recovery(mds_server, state); - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - nfs4_schedule_stateid_recovery(mds_server, state); - nfs4_schedule_lease_recovery(mds_client); - goto wait_on_recovery; - /* DS session errors */ case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: @@ -133,14 +115,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, *reset = 1; break; } -out: task->tk_status = 0; return -EAGAIN; -wait_on_recovery: - rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) - rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); - goto out; } /* NFS_PROTO call done callback routines */ @@ -197,7 +173,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - task)) + 0, task)) return; rpc_call_start(task); @@ -213,18 +189,10 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) rdata->mds_ops->rpc_call_done(task, data); } -static void filelayout_read_count_stats(struct rpc_task *task, void *data) -{ - struct nfs_read_data *rdata = (struct nfs_read_data *)data; - - rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); -} - static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; - put_lseg(rdata->lseg); rdata->mds_ops->rpc_release(data); } @@ -286,7 +254,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - task)) + 0, task)) return; rpc_call_start(task); @@ -300,18 +268,10 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) wdata->mds_ops->rpc_call_done(task, data); } -static void filelayout_write_count_stats(struct rpc_task *task, void *data) -{ - struct nfs_write_data *wdata = (struct nfs_write_data *)data; - - rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); -} - static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = (struct nfs_write_data *)data; - put_lseg(wdata->lseg); wdata->mds_ops->rpc_release(data); } @@ -322,28 +282,24 @@ static void filelayout_commit_release(void *data) nfs_commit_release_pages(wdata); if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) nfs_commit_clear_lock(NFS_I(wdata->inode)); - put_lseg(wdata->lseg); nfs_commitdata_release(wdata); } -static const struct rpc_call_ops filelayout_read_call_ops = { +struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, - .rpc_count_stats = filelayout_read_count_stats, .rpc_release = filelayout_read_release, }; -static const struct rpc_call_ops filelayout_write_call_ops = { +struct rpc_call_ops filelayout_write_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, - .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_write_release, }; -static const struct rpc_call_ops filelayout_commit_call_ops = { +struct rpc_call_ops filelayout_commit_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, - .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_commit_release, }; @@ -411,8 +367,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", - __func__); + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; @@ -620,7 +575,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err_free; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { - printk(KERN_ERR "NFS: Too big fh %d received %d\n", + printk(KERN_ERR "Too big fh %d received %d\n", i, fl->fh_array[i]->size); goto out_err_free; } @@ -685,16 +640,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); + fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; } fl->number_of_buckets = size; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&fl->commit_buckets[i].written); - INIT_LIST_HEAD(&fl->commit_buckets[i].committing); - } + for (i = 0; i < size; i++) + INIT_LIST_HEAD(&fl->commit_buckets[i]); } return &fl->generic_hdr; } @@ -726,7 +679,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } -static void +void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -743,7 +696,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, nfs_pageio_reset_read_mds(pgio); } -static void +void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -772,6 +725,11 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_doio = pnfs_generic_pg_writepages, }; +static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) +{ + return !FILELAYOUT_LSEG(lseg)->commit_through_mds; +} + static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) { if (fl->stripe_type == STRIPE_SPARSE) @@ -780,49 +738,13 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) return j; } -/* The generic layer is about to remove the req from the commit list. - * If this will make the bucket empty, it will need to put the lseg reference. - */ -static void -filelayout_clear_request_commit(struct nfs_page *req) -{ - struct pnfs_layout_segment *freeme = NULL; - struct inode *inode = req->wb_context->dentry->d_inode; - - spin_lock(&inode->i_lock); - if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) - goto out; - if (list_is_singular(&req->wb_list)) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct pnfs_layout_segment *lseg; - - /* From here we can find the bucket, but for the moment, - * since there is only one relevant lseg... - */ - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { - if (lseg->pls_range.iomode == IOMODE_RW) { - freeme = lseg; - break; - } - } - } -out: - nfs_request_remove_commit_list(req); - spin_unlock(&inode->i_lock); - put_lseg(freeme); -} - -static struct list_head * -filelayout_choose_commit_list(struct nfs_page *req, - struct pnfs_layout_segment *lseg) +struct list_head *filelayout_choose_commit_list(struct nfs_page *req) { + struct pnfs_layout_segment *lseg = req->wb_commit_lseg; struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; - if (fl->commit_through_mds) - return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; - /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive * alternative is to add a field to nfs_write_data and nfs_page @@ -832,30 +754,14 @@ filelayout_choose_commit_list(struct nfs_page *req, j = nfs4_fl_calc_j_index(lseg, (loff_t)req->wb_index << PAGE_CACHE_SHIFT); i = select_bucket_index(fl, j); - list = &fl->commit_buckets[i].written; + list = &fl->commit_buckets[i]; if (list_empty(list)) { - /* Non-empty buckets hold a reference on the lseg. That ref - * is normally transferred to the COMMIT call and released - * there. It could also be released if the last req is pulled - * off due to a rewrite, in which case it will be done in - * filelayout_remove_commit_req - */ + /* Non-empty buckets hold a reference on the lseg */ get_lseg(lseg); } - set_bit(PG_COMMIT_TO_DS, &req->wb_flags); return list; } -static void -filelayout_mark_request_commit(struct nfs_page *req, - struct pnfs_layout_segment *lseg) -{ - struct list_head *list; - - list = filelayout_choose_commit_list(req, lseg); - nfs_request_add_commit_list(req, list); -} - static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) { struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); @@ -891,12 +797,11 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", - __func__); + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); - filelayout_commit_release(data); + data->mds_ops->rpc_release(data); return -EAGAIN; } dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); @@ -912,87 +817,24 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) /* * This is only useful while we are using whole file layouts. */ -static struct pnfs_layout_segment * -find_only_write_lseg_locked(struct inode *inode) -{ - struct pnfs_layout_segment *lseg; - - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) - if (lseg->pls_range.iomode == IOMODE_RW) - return lseg; - return NULL; -} - static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) { - struct pnfs_layout_segment *rv; + struct pnfs_layout_segment *lseg, *rv = NULL; spin_lock(&inode->i_lock); - rv = find_only_write_lseg_locked(inode); - if (rv) - get_lseg(rv); + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) + if (lseg->pls_range.iomode == IOMODE_RW) + rv = get_lseg(lseg); spin_unlock(&inode->i_lock); return rv; } -static int -filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, - spinlock_t *lock) -{ - struct list_head *src = &bucket->written; - struct list_head *dst = &bucket->committing; - struct nfs_page *req, *tmp; - int ret = 0; - - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; - if (cond_resched_lock(lock)) - list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req); - clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); - nfs_list_add_request(req, dst); - ret++; - if (ret == max) - break; - } - return ret; -} - -/* Move reqs from written to committing lists, returning count of number moved. - * Note called with i_lock held. - */ -static int filelayout_scan_commit_lists(struct inode *inode, int max, - spinlock_t *lock) -{ - struct pnfs_layout_segment *lseg; - struct nfs4_filelayout_segment *fl; - int i, rv = 0, cnt; - - lseg = find_only_write_lseg_locked(inode); - if (!lseg) - goto out_done; - fl = FILELAYOUT_LSEG(lseg); - if (fl->commit_through_mds) - goto out_done; - for (i = 0; i < fl->number_of_buckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], - max, lock); - max -= cnt; - rv += cnt; - } -out_done: - return rv; -} - -static unsigned int -alloc_ds_commits(struct inode *inode, struct list_head *list) +static int alloc_ds_commits(struct inode *inode, struct list_head *list) { struct pnfs_layout_segment *lseg; struct nfs4_filelayout_segment *fl; struct nfs_write_data *data; int i, j; - unsigned int nreq = 0; /* Won't need this when non-whole file layout segments are supported * instead we will use a pnfs_layout_hdr structure */ @@ -1001,27 +843,28 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) return 0; fl = FILELAYOUT_LSEG(lseg); for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i].committing)) + if (list_empty(&fl->commit_buckets[i])) continue; data = nfs_commitdata_alloc(); if (!data) - break; + goto out_bad; data->ds_commit_index = i; data->lseg = lseg; list_add(&data->pages, list); - nreq++; } + put_lseg(lseg); + return 0; - /* Clean up on error */ +out_bad: for (j = i; j < fl->number_of_buckets; j++) { - if (list_empty(&fl->commit_buckets[i].committing)) + if (list_empty(&fl->commit_buckets[i])) continue; - nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); + nfs_retry_commit(&fl->commit_buckets[i], lseg); put_lseg(lseg); /* associated with emptying bucket */ } put_lseg(lseg); /* Caller will clean up entries put on list */ - return nreq; + return -ENOMEM; } /* This follows nfs_commit_list pretty closely */ @@ -1031,40 +874,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, { struct nfs_write_data *data, *tmp; LIST_HEAD(list); - unsigned int nreq = 0; if (!list_empty(mds_pages)) { data = nfs_commitdata_alloc(); - if (data != NULL) { - data->lseg = NULL; - list_add(&data->pages, &list); - nreq++; - } else - nfs_retry_commit(mds_pages, NULL); - } - - nreq += alloc_ds_commits(inode, &list); - - if (nreq == 0) { - nfs_commit_clear_lock(NFS_I(inode)); - goto out; + if (!data) + goto out_bad; + data->lseg = NULL; + list_add(&data->pages, &list); } - atomic_add(nreq, &NFS_I(inode)->commits_outstanding); + if (alloc_ds_commits(inode, &list)) + goto out_bad; list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); + atomic_inc(&NFS_I(inode)->commits_outstanding); if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL); nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); } else { - nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); + nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); filelayout_initiate_commit(data, how); } } -out: - return PNFS_ATTEMPTED; + return 0; + out_bad: + list_for_each_entry_safe(data, tmp, &list, pages) { + nfs_retry_commit(&data->pages, data->lseg); + list_del_init(&data->pages); + nfs_commit_free(data); + } + nfs_retry_commit(mds_pages, NULL); + nfs_commit_clear_lock(NFS_I(inode)); + return -ENOMEM; } static void @@ -1081,9 +924,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, - .mark_request_commit = filelayout_mark_request_commit, - .clear_request_commit = filelayout_clear_request_commit, - .scan_commit_lists = filelayout_scan_commit_lists, + .mark_pnfs_commit = filelayout_mark_pnfs_commit, + .choose_commit_list = filelayout_choose_commit_list, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/trunk/fs/nfs/nfs4filelayout.h b/trunk/fs/nfs/nfs4filelayout.h index 21190bb1f5e3..2e42284253fa 100644 --- a/trunk/fs/nfs/nfs4filelayout.h +++ b/trunk/fs/nfs/nfs4filelayout.h @@ -74,11 +74,6 @@ struct nfs4_file_layout_dsaddr { struct nfs4_pnfs_ds *ds_list[1]; }; -struct nfs4_fl_commit_bucket { - struct list_head written; - struct list_head committing; -}; - struct nfs4_filelayout_segment { struct pnfs_layout_segment generic_hdr; u32 stripe_type; @@ -89,7 +84,7 @@ struct nfs4_filelayout_segment { struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; - struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ + struct list_head *commit_buckets; /* Sort commits to ds */ int number_of_buckets; }; diff --git a/trunk/fs/nfs/nfs4filelayoutdev.c b/trunk/fs/nfs/nfs4filelayoutdev.c index a866bbd2890a..8ae91908f5aa 100644 --- a/trunk/fs/nfs/nfs4filelayoutdev.c +++ b/trunk/fs/nfs/nfs4filelayoutdev.c @@ -45,7 +45,7 @@ * - incremented when a device id maps a data server already in the cache. * - decremented when deviceid is removed from the cache. */ -static DEFINE_SPINLOCK(nfs4_ds_cache_lock); +DEFINE_SPINLOCK(nfs4_ds_cache_lock); static LIST_HEAD(nfs4_data_server_cache); /* Debug routines */ @@ -108,40 +108,58 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } -static bool -_same_data_server_addrs_locked(const struct list_head *dsaddrs1, - const struct list_head *dsaddrs2) +/* + * Lookup DS by addresses. The first matching address returns true. + * nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(struct list_head *dsaddrs) { + struct nfs4_pnfs_ds *ds; struct nfs4_pnfs_ds_addr *da1, *da2; - /* step through both lists, comparing as we go */ - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); - da1 != NULL && da2 != NULL; - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return false; + list_for_each_entry(da1, dsaddrs, da_node) { + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + list_for_each_entry(da2, &ds->ds_addrs, da_node) { + if (same_sockaddr( + (struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return ds; + } + } } - if (da1 == NULL && da2 == NULL) - return true; - - return false; + return NULL; } /* - * Lookup DS by addresses. nfs4_ds_cache_lock is held + * Compare two lists of addresses. */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) +static bool +_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, + struct list_head *dsaddrs2) { - struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds_addr *da1, *da2; + size_t count1 = 0, + count2 = 0; + + list_for_each_entry(da1, dsaddrs1, da_node) + count1++; + + list_for_each_entry(da2, dsaddrs2, da_node) { + bool found = false; + count2++; + list_for_each_entry(da1, dsaddrs1, da_node) { + if (same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) { + found = true; + break; + } + } + if (!found) + return false; + } - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) - return ds; - return NULL; + return (count1 == count2); } /* @@ -338,6 +356,11 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, + dsaddrs)) { + dprintk("%s: multipath address mismatch: %s != %s", + __func__, tmp_ds->ds_remotestr, remotestr); + } kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); @@ -355,7 +378,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) * Currently only supports ipv4, ipv6 and one multi-path address. */ static struct nfs4_pnfs_ds_addr * -decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) +decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) { struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; @@ -434,7 +457,7 @@ decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) INIT_LIST_HEAD(&da->da_node); - if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, sizeof(da->da_addr))) { dprintk("%s: error parsing address %s\n", __func__, buf); goto out_free_da; @@ -531,7 +554,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) cnt = be32_to_cpup(p); dprintk("%s stripe count %d\n", __func__, cnt); if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { - printk(KERN_WARNING "NFS: %s: stripe count %d greater than " + printk(KERN_WARNING "%s: stripe count %d greater than " "supported maximum %d\n", __func__, cnt, NFS4_PNFS_MAX_STRIPE_CNT); goto out_err_free_scratch; @@ -562,7 +585,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) num = be32_to_cpup(p); dprintk("%s ds_num %u\n", __func__, num); if (num > NFS4_PNFS_MAX_MULTI_CNT) { - printk(KERN_WARNING "NFS: %s: multipath count %d greater than " + printk(KERN_WARNING "%s: multipath count %d greater than " "supported maximum %d\n", __func__, num, NFS4_PNFS_MAX_MULTI_CNT); goto out_err_free_stripe_indices; @@ -570,7 +593,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) /* validate stripe indices are all < num */ if (max_stripe_index >= num) { - printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", + printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", __func__, max_stripe_index, num); goto out_err_free_stripe_indices; } @@ -602,8 +625,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, - &stream, gfp_flags); + da = decode_ds_addr(&stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -664,7 +686,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl new = decode_device(inode, dev, gfp_flags); if (!new) { - printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", + printk(KERN_WARNING "%s: Could not decode or add device\n", __func__); return NULL; } @@ -813,7 +835,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; if (ds == NULL) { - printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", + printk(KERN_ERR "%s: No data server for offset index %d\n", __func__, ds_idx); return NULL; } diff --git a/trunk/fs/nfs/nfs4namespace.c b/trunk/fs/nfs/nfs4namespace.c index 9c8eca315f43..bb80c49b6533 100644 --- a/trunk/fs/nfs/nfs4namespace.c +++ b/trunk/fs/nfs/nfs4namespace.c @@ -94,14 +94,13 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct nfs_server *server) + struct sockaddr *sa, size_t salen) { - struct net *net = rpc_net_ns(server->client); ssize_t ret; - ret = rpc_pton(net, string, len, sa, salen); + ret = rpc_pton(string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(net, string, len, sa, salen); + ret = nfs_dns_resolve_name(string, len, sa, salen); if (ret < 0) ret = 0; } @@ -138,8 +137,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, - NFS_SB(mountdata->sb)); + mountdata->addr, addr_bufsize); if (mountdata->addrlen == 0) continue; diff --git a/trunk/fs/nfs/nfs4proc.c b/trunk/fs/nfs/nfs4proc.c index e809d2305ebf..caf92d05c3a9 100644 --- a/trunk/fs/nfs/nfs4proc.c +++ b/trunk/fs/nfs/nfs4proc.c @@ -72,21 +72,18 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) -static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); -static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); #ifdef CONFIG_NFS_V4_1 -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); -static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); +static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); #endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) @@ -262,28 +259,15 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; - struct inode *inode = exception->inode; int ret = errorcode; exception->retry = 0; switch(errorcode) { case 0: return 0; - case -NFS4ERR_OPENMODE: - if (nfs_have_delegation(inode, FMODE_READ)) { - nfs_inode_return_delegation(inode); - exception->retry = 1; - return 0; - } - if (state == NULL) - break; - nfs4_schedule_stateid_recovery(server, state); - goto wait_on_recovery; - case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state != NULL) - nfs_remove_bad_delegation(state->inode); + case -NFS4ERR_OPENMODE: if (state == NULL) break; nfs4_schedule_stateid_recovery(server, state); @@ -376,14 +360,16 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * When updating highest_used_slotid there may be "holes" in the bitmap * so we need to scan down from highest_used_slotid to 0 looking for the now * highest slotid in use. - * If none found, highest_used_slotid is set to NFS4_NO_SLOT. + * If none found, highest_used_slotid is set to -1. * * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) +nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) { - BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE); + int slotid = free_slotid; + + BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -393,16 +379,10 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) if (slotid < tbl->max_slots) tbl->highest_used_slotid = slotid; else - tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->highest_used_slotid = -1; } - dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, - slotid, tbl->highest_used_slotid); -} - -bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - return true; + dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, + free_slotid, tbl->highest_used_slotid); } /* @@ -410,13 +390,16 @@ bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) */ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) { + struct rpc_task *task; + if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, - nfs4_set_task_privileged, NULL); + task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); + if (task) + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); return; } - if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT) + if (ses->fc_slot_table.highest_used_slotid != -1) return; dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); @@ -429,7 +412,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) void nfs4_check_drain_bc_complete(struct nfs4_session *ses) { if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || - ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT) + ses->bc_slot_table.highest_used_slotid != -1) return; dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); complete(&ses->bc_slot_table.complete); @@ -524,25 +507,25 @@ static int nfs4_sequence_done(struct rpc_task *task, * nfs4_find_slot looks for an unset bit in the used_slots bitmap. * If found, we mark the slot as used, update the highest_used_slotid, * and respectively set up the sequence operation args. - * The slot number is returned if found, or NFS4_NO_SLOT otherwise. + * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. * * Note: must be called with under the slot_tbl_lock. */ -static u32 +static u8 nfs4_find_slot(struct nfs4_slot_table *tbl) { - u32 slotid; - u32 ret_id = NFS4_NO_SLOT; + int slotid; + u8 ret_id = NFS4_MAX_SLOT_TABLE; + BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); - dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", + dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, tbl->max_slots); slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); if (slotid >= tbl->max_slots) goto out; __set_bit(slotid, tbl->used_slots); - if (slotid > tbl->highest_used_slotid || - tbl->highest_used_slotid == NFS4_NO_SLOT) + if (slotid > tbl->highest_used_slotid) tbl->highest_used_slotid = slotid; ret_id = slotid; out: @@ -551,25 +534,15 @@ nfs4_find_slot(struct nfs4_slot_table *tbl) return ret_id; } -static void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) -{ - args->sa_session = NULL; - args->sa_cache_this = 0; - if (cache_reply) - args->sa_cache_this = 1; - res->sr_session = NULL; - res->sr_slot = NULL; -} - int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task) { struct nfs4_slot *slot; struct nfs4_slot_table *tbl; - u32 slotid; + u8 slotid; dprintk("--> %s\n", __func__); /* slot already allocated? */ @@ -597,7 +570,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, } slotid = nfs4_find_slot(tbl); - if (slotid == NFS4_NO_SLOT) { + if (slotid == NFS4_MAX_SLOT_TABLE) { rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); dprintk("<-- %s: no free slots\n", __func__); @@ -609,6 +582,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, slot = tbl->slots + slotid; args->sa_session = session; args->sa_slotid = slotid; + args->sa_cache_this = cache_reply; dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); @@ -628,19 +602,24 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence); int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task) { struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) + if (session == NULL) { + args->sa_session = NULL; + res->sr_session = NULL; goto out; + } dprintk("--> %s clp %p session %p sr_slot %td\n", __func__, session->clp, session, res->sr_slot ? res->sr_slot - session->fc_slot_table.slots : -1); - ret = nfs41_setup_sequence(session, args, res, task); + ret = nfs41_setup_sequence(session, args, res, cache_reply, + task); out: dprintk("<-- %s status=%d\n", __func__, ret); return ret; @@ -650,6 +629,7 @@ struct nfs41_call_sync_data { const struct nfs_server *seq_server; struct nfs4_sequence_args *seq_args; struct nfs4_sequence_res *seq_res; + int cache_reply; }; static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) @@ -659,7 +639,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); if (nfs4_setup_sequence(data->seq_server, data->seq_args, - data->seq_res, task)) + data->seq_res, data->cache_reply, task)) return; rpc_call_start(task); } @@ -677,12 +657,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) nfs41_sequence_done(task, data->seq_res); } -static const struct rpc_call_ops nfs41_call_sync_ops = { +struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_prepare = nfs41_call_sync_prepare, .rpc_call_done = nfs41_call_sync_done, }; -static const struct rpc_call_ops nfs41_call_priv_sync_ops = { +struct rpc_call_ops nfs41_call_priv_sync_ops = { .rpc_call_prepare = nfs41_call_priv_sync_prepare, .rpc_call_done = nfs41_call_sync_done, }; @@ -692,6 +672,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct rpc_message *msg, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, int privileged) { int ret; @@ -700,6 +681,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .seq_server = server, .seq_args = args, .seq_res = res, + .cache_reply = cache_reply, }; struct rpc_task_setup task_setup = { .rpc_client = clnt, @@ -708,6 +690,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .callback_data = &data }; + res->sr_slot = NULL; if (privileged) task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); @@ -727,17 +710,10 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - nfs41_init_sequence(args, res, cache_reply); - return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); + return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); } #else -static inline -void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) -{ -} - static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -752,7 +728,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - nfs41_init_sequence(args, res, cache_reply); + args->sa_session = res->sr_session = NULL; return rpc_call_sync(clnt, msg, 0); } @@ -839,22 +815,20 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_seqid.owner_id; + p->o_arg.id = sp->so_owner_id.id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; - if (attrs != NULL && attrs->ia_valid != 0) { - __be32 verf[2]; + if (flags & O_CREAT) { + u32 *s; p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); - - verf[0] = jiffies; - verf[1] = current->pid; - memcpy(p->o_arg.u.verifier.data, verf, - sizeof(p->o_arg.u.verifier.data)); + s = (u32 *) p->o_arg.u.verifier.data; + s[0] = jiffies; + s[1] = current->pid; } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; @@ -904,7 +878,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode { int ret = 0; - if (open_mode & (O_EXCL|O_TRUNC)) + if (open_mode & O_EXCL) goto out; switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: @@ -953,8 +927,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - nfs4_stateid_copy(&state->stateid, stateid); - nfs4_stateid_copy(&state->open_stateid, stateid); + memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); + memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); @@ -982,7 +956,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s */ write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { - nfs4_stateid_copy(&state->stateid, deleg_stateid); + memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) @@ -1013,7 +987,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat if (delegation == NULL) delegation = &deleg_cur->stateid; - else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) + else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); @@ -1052,7 +1026,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; - int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); + int open_mode = opendata->o_arg.open_flags & O_EXCL; fmode_t fmode = opendata->o_arg.fmode; nfs4_stateid stateid; int ret = -EAGAIN; @@ -1074,7 +1048,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) break; } /* Save the delegation */ - nfs4_stateid_copy(&stateid, &delegation->stateid); + memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); if (ret != 0) @@ -1116,7 +1090,6 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (state == NULL) goto err_put_inode; if (data->o_res.delegation_type != 0) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; int delegation_flags = 0; rcu_read_lock(); @@ -1128,7 +1101,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); + NFS_CLIENT(inode)->cl_server); } else if ((delegation_flags & 1UL<inode, data->owner->so_cred, @@ -1237,10 +1210,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * * Check if we need to update the current stateid. */ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && - !nfs4_stateid_match(&state->stateid, &state->open_stateid)) { + memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { write_seqlock(&state->seqlock); if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - nfs4_stateid_copy(&state->stateid, &state->open_stateid); + memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); write_sequnlock(&state->seqlock); } return 0; @@ -1309,7 +1282,8 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs if (IS_ERR(opendata)) return PTR_ERR(opendata); opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); + memcpy(opendata->o_arg.u.delegation.data, stateid->data, + sizeof(opendata->o_arg.u.delegation.data)); ret = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); return ret; @@ -1345,11 +1319,8 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state * The show must go on: exit, but mark the * stateid as needing recovery. */ - case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - nfs_inode_find_state_and_recover(state->inode, - stateid); nfs4_schedule_stateid_recovery(server, state); case -EKEYEXPIRED: /* @@ -1374,7 +1345,8 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; if (data->rpc_status == 0) { - nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); + memcpy(data->o_res.stateid.data, data->c_res.stateid.data, + sizeof(data->o_res.stateid.data)); nfs_confirm_seqid(&data->owner->so_seqid, 0); renew_lease(data->o_res.server, data->timestamp); data->rpc_done = 1; @@ -1468,7 +1440,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update sequence id. */ - data->o_arg.id = sp->so_seqid.owner_id; + data->o_arg.id = sp->so_owner_id.id; data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; @@ -1477,7 +1449,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, task)) + &data->o_res.seq_res, 1, task)) return; rpc_call_start(task); return; @@ -1579,7 +1551,6 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; - nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; @@ -1741,32 +1712,15 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta } #if defined(CONFIG_NFS_V4_1) -static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) -{ - int status = NFS_OK; - struct nfs_server *server = NFS_SERVER(state->inode); - - if (state->flags & flags) { - status = nfs41_test_stateid(server, stateid); - if (status != NFS_OK) { - nfs41_free_stateid(server, stateid); - state->flags &= ~flags; - } - } - return status; -} - static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - int deleg_status, open_status; - int deleg_flags = 1 << NFS_DELEGATED_STATE; - int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); - - deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); - open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); + int status; + struct nfs_server *server = NFS_SERVER(state->inode); - if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) - return NFS_OK; + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); return nfs4_open_expired(sp, state); } #endif @@ -1800,8 +1754,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode /* Protect against reboot recovery conflicts */ status = -ENOMEM; - sp = nfs4_get_state_owner(server, cred, GFP_KERNEL); - if (sp == NULL) { + if (!(sp = nfs4_get_state_owner(server, cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out_err; } @@ -1876,7 +1829,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - pr_warn_ratelimited("NFS: v4 server %s " + printk(KERN_WARNING "NFS: v4 server %s " " returned a bad sequence-id error!\n", NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; @@ -1929,14 +1882,12 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); - if (state != NULL) { - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - current->files, current->tgid); - } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, - FMODE_WRITE)) { + if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { /* Use that stateid */ + } else if (state != NULL) { + nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid); } else - nfs4_stateid_copy(&arg.stateid, &zero_stateid); + memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (status == 0 && state != NULL) @@ -1949,10 +1900,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_exception exception = { - .state = state, - .inode = inode, - }; + struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, @@ -2006,7 +1954,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); - dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; /* hmm. we are done with the inode, and in the process of freeing @@ -2034,7 +1981,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); - dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } static void nfs4_close_prepare(struct rpc_task *task, void *data) @@ -2043,7 +1989,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; int call_close = 0; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; @@ -2068,7 +2013,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; - goto out; + return; } if (calldata->arg.fmode == 0) { @@ -2077,20 +2022,17 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, task, NULL); - goto out; + return; } } nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), - &calldata->arg.seq_args, - &calldata->res.seq_res, - task)) - goto out; + &calldata->arg.seq_args, &calldata->res.seq_res, + 1, task)) + return; rpc_call_start(task); -out: - dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_close_ops = { @@ -2132,7 +2074,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -2241,7 +2182,6 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; - server->fh_expire_type = res.fh_expire_type; } return status; @@ -2363,6 +2303,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_map_errors(status); } +static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); /* * Get locations and (maybe) other attributes of a referral. * Note that we'll actually follow the referral later when @@ -2479,10 +2420,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } } - /* Deal with open(O_TRUNC) */ - if (sattr->ia_valid & ATTR_OPEN) - sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); - status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); @@ -2557,7 +2494,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->cache_consistency_bitmask, + .bitmask = server->attr_bitmask, }; struct nfs4_accessres res = { .server = server, @@ -2775,18 +2712,8 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) args->bitmask = server->cache_consistency_bitmask; res->server = server; + res->seq_res.sr_slot = NULL; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; - nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); -} - -static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->dir), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); } static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) @@ -2811,17 +2738,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; arg->bitmask = server->attr_bitmask; res->server = server; - nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); -} - -static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->old_dir), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); } static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, @@ -3316,17 +3232,6 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message data->timestamp = jiffies; data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); -} - -static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); } /* Reset the the nfs_read_data to send the read to the MDS. */ @@ -3400,17 +3305,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag data->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); -} - -static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); } static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3445,7 +3339,6 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa data->write_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } struct nfs4_renewdata { @@ -3821,11 +3714,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (task->tk_status >= 0) return 0; switch(task->tk_status) { - case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state != NULL) - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -3874,16 +3764,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, return -EAGAIN; } -static void nfs4_construct_boot_verifier(struct nfs_client *clp, - nfs4_verifier *bootverf) -{ - __be32 verf[2]; - - verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); - verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); - memcpy(bootverf->data, verf, sizeof(bootverf->data)); -} - int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred, struct nfs4_setclientid_res *res) @@ -3900,13 +3780,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + __be32 *p; int loop = 0; int status; - nfs4_construct_boot_verifier(clp, &sc_verifier); + p = (__be32*)sc_verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); for(;;) { - rcu_read_lock(); setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%s %s %s %u", clp->cl_ipaddr, @@ -3923,7 +3805,6 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - rcu_read_unlock(); status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status != -NFS4ERR_CLID_INUSE) @@ -4010,7 +3891,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) if (nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, - &d_data->res.seq_res, task)) + &d_data->res.seq_res, 1, task)) return; rpc_call_start(task); } @@ -4044,12 +3925,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->attr_bitmask; nfs_copy_fh(&data->fh, NFS_FH(inode)); - nfs4_stateid_copy(&data->stateid, stateid); + memcpy(&data->stateid, stateid, sizeof(data->stateid)); data->res.fattr = &data->fattr; data->res.server = server; nfs_fattr_init(data->res.fattr); @@ -4136,7 +4016,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_seqid.owner_id; + arg.lock_owner.id = lsp->ls_id.id; arg.lock_owner.s_dev = server->s_dev; status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); switch (status) { @@ -4232,8 +4112,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) return; switch (task->tk_status) { case 0: - nfs4_stateid_copy(&calldata->lsp->ls_stateid, - &calldata->res.stateid); + memcpy(calldata->lsp->ls_stateid.data, + calldata->res.stateid.data, + sizeof(calldata->lsp->ls_stateid.data)); renew_lease(calldata->server, calldata->timestamp); break; case -NFS4ERR_BAD_STATEID: @@ -4261,7 +4142,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, task)) + &calldata->res.seq_res, 1, task)) return; rpc_call_start(task); } @@ -4301,7 +4182,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4381,7 +4261,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_seqid.owner_id; + p->arg.lock_owner.id = lsp->ls_id.id; p->arg.lock_owner.s_dev = server->s_dev; p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; @@ -4417,7 +4297,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, task)) + &data->res.seq_res, 1, task)) return; rpc_call_start(task); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); @@ -4446,7 +4326,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) goto out; } if (data->rpc_status == 0) { - nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); + memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, + sizeof(data->lsp->ls_stateid.data)); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); } @@ -4534,7 +4415,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.reclaim = NFS_LOCK_RECLAIM; task_setup_data.callback_ops = &nfs4_recover_lock_ops; } - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4599,34 +4479,15 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request } #if defined(CONFIG_NFS_V4_1) -static int nfs41_check_expired_locks(struct nfs4_state *state) -{ - int status, ret = NFS_OK; - struct nfs4_lock_state *lsp; - struct nfs_server *server = NFS_SERVER(state->inode); - - list_for_each_entry(lsp, &state->lock_states, ls_locks) { - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { - status = nfs41_test_stateid(server, &lsp->ls_stateid); - if (status != NFS_OK) { - nfs41_free_stateid(server, &lsp->ls_stateid); - lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; - ret = status; - } - } - }; - - return ret; -} - static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) { - int status = NFS_OK; + int status; + struct nfs_server *server = NFS_SERVER(state->inode); - if (test_bit(LK_STATE_IN_USE, &state->flags)) - status = nfs41_check_expired_locks(state); + status = nfs41_test_stateid(server, state); if (status == NFS_OK) - return status; + return 0; + nfs41_free_stateid(server, state); return nfs4_lock_expired(state, request); } #endif @@ -4662,8 +4523,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Note: we always want to sleep here! */ request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " - "manager!\n", __func__); + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); out_unlock: up_read(&nfsi->rwsem); out: @@ -4673,9 +4533,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_exception exception = { - .state = state, - }; + struct nfs4_exception exception = { }; int err; do { @@ -4745,8 +4603,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); switch (err) { default: - printk(KERN_ERR "NFS: %s: unhandled error " - "%d.\n", __func__, err); + printk(KERN_ERR "%s: unhandled error %d.\n", + __func__, err); case 0: case -ESTALE: goto out; @@ -4768,7 +4626,6 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) * The show must go on: exit, but mark the * stateid as needing recovery. */ - case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: @@ -4798,44 +4655,33 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) return err; } -struct nfs_release_lockowner_data { - struct nfs4_lock_state *lsp; - struct nfs_server *server; - struct nfs_release_lockowner_args args; -}; - static void nfs4_release_lockowner_release(void *calldata) { - struct nfs_release_lockowner_data *data = calldata; - nfs4_free_lock_state(data->server, data->lsp); kfree(calldata); } -static const struct rpc_call_ops nfs4_release_lockowner_ops = { +const struct rpc_call_ops nfs4_release_lockowner_ops = { .rpc_release = nfs4_release_lockowner_release, }; -int nfs4_release_lockowner(struct nfs4_lock_state *lsp) +void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) { struct nfs_server *server = lsp->ls_state->owner->so_server; - struct nfs_release_lockowner_data *data; + struct nfs_release_lockowner_args *args; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], }; if (server->nfs_client->cl_mvops->minor_version != 0) - return -EINVAL; - data = kmalloc(sizeof(*data), GFP_NOFS); - if (!data) - return -ENOMEM; - data->lsp = lsp; - data->server = server; - data->args.lock_owner.clientid = server->nfs_client->cl_clientid; - data->args.lock_owner.id = lsp->ls_seqid.owner_id; - data->args.lock_owner.s_dev = server->s_dev; - msg.rpc_argp = &data->args; - rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); - return 0; + return; + args = kmalloc(sizeof(*args), GFP_NOFS); + if (!args) + return; + args->lock_owner.clientid = server->nfs_client->cl_clientid; + args->lock_owner.id = lsp->ls_id.id; + args->lock_owner.s_dev = server->s_dev; + msg.rpc_argp = args; + rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" @@ -4881,11 +4727,11 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || (fattr->valid & NFS_ATTR_FATTR_FILEID)) && (fattr->valid & NFS_ATTR_FATTR_FSID) && - (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS))) + (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) return; fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | - NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL; + NFS_ATTR_FATTR_NLINK; fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; fattr->nlink = 2; } @@ -4952,8 +4798,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct return status; } -static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, - struct nfs4_secinfo_flavors *flavors) +int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) { struct nfs4_exception exception = { }; int err; @@ -5007,7 +4852,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) { nfs4_verifier verifier; struct nfs41_exchange_id_args args = { - .verifier = &verifier, .client = clp, .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, }; @@ -5021,11 +4865,15 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .rpc_resp = &res, .rpc_cred = cred, }; + __be32 *p; dprintk("--> %s\n", __func__); BUG_ON(clp == NULL); - nfs4_construct_boot_verifier(clp, &verifier); + p = (u32 *)verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + args.verifier = &verifier; args.id_len = scnprintf(args.id, sizeof(args.id), "%s/%s.%s/%u", @@ -5040,23 +4888,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out; } - res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); - if (unlikely(!res.impl_id)) { - status = -ENOMEM; - goto out_server_scope; - } - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); - if (!status) { - /* use the most recent implementation id */ - kfree(clp->impl_id); - clp->impl_id = res.impl_id; - } else - kfree(res.impl_id); - if (!status) { if (clp->server_scope && !nfs41_same_server_scope(clp->server_scope, @@ -5073,16 +4908,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out; } } - -out_server_scope: kfree(res.server_scope); out: - if (clp->impl_id) - dprintk("%s: Server Implementation ID: " - "domain: %s, name: %s, date: %llu,%u\n", - __func__, clp->impl_id->domain, clp->impl_id->name, - clp->impl_id->date.seconds, - clp->impl_id->date.nseconds); dprintk("<-- %s status= %d\n", __func__, status); return status; } @@ -5106,7 +4933,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, since we're invoked within one */ ret = nfs41_setup_sequence(data->clp->cl_session, &data->args->la_seq_args, - &data->res->lr_seq_res, task); + &data->res->lr_seq_res, 0, task); BUG_ON(ret == -EAGAIN); rpc_call_start(task); @@ -5139,7 +4966,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } -static const struct rpc_call_ops nfs4_get_lease_time_ops = { +struct rpc_call_ops nfs4_get_lease_time_ops = { .rpc_call_prepare = nfs4_get_lease_time_prepare, .rpc_call_done = nfs4_get_lease_time_done, }; @@ -5170,7 +4997,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5287,13 +5113,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) return NULL; tbl = &session->fc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->highest_used_slotid = -1; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); init_completion(&tbl->complete); tbl = &session->bc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->highest_used_slotid = -1; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); init_completion(&tbl->complete); @@ -5306,16 +5132,11 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) void nfs4_destroy_session(struct nfs4_session *session) { - struct rpc_xprt *xprt; - nfs4_proc_destroy_session(session); - - rcu_read_lock(); - xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); - rcu_read_unlock(); dprintk("%s Destroy backchannel for xprt %p\n", - __func__, xprt); - xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + __func__, session->clp->cl_rpcclient->cl_xprt); + xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); nfs4_destroy_slot_tables(session); kfree(session); } @@ -5343,7 +5164,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; - args->fc_attrs.max_reqs = max_session_slots; + args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " "max_ops=%u max_reqs=%u\n", @@ -5383,8 +5204,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args return -EINVAL; if (rcvd->max_reqs == 0) return -EINVAL; - if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE) - rcvd->max_reqs = NFS4_MAX_SLOT_TABLE; return 0; } @@ -5400,9 +5219,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; /* These would render the backchannel useless: */ - if (rcvd->max_ops != sent->max_ops) + if (rcvd->max_ops == 0) return -EINVAL; - if (rcvd->max_reqs != sent->max_reqs) + if (rcvd->max_reqs == 0) return -EINVAL; return 0; } @@ -5505,7 +5324,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) if (status) printk(KERN_WARNING - "NFS: Got error %d from the server on DESTROY_SESSION. " + "Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); dprintk("<-- nfs4_proc_destroy_session\n"); @@ -5628,7 +5447,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - if (nfs41_setup_sequence(clp->cl_session, args, res, task)) + if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) return; rpc_call_start(task); } @@ -5660,7 +5479,6 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ nfs_put_client(clp); return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&calldata->args, &calldata->res, 0); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5722,7 +5540,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); if (nfs41_setup_sequence(calldata->clp->cl_session, &calldata->arg.seq_args, - &calldata->res.seq_res, task)) + &calldata->res.seq_res, 0, task)) return; rpc_call_start(task); @@ -5801,7 +5619,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->clp = clp; calldata->arg.one_fs = 0; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -5833,7 +5650,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) * to be no way to prevent it completely. */ if (nfs4_setup_sequence(server, &lgp->args.seq_args, - &lgp->res.seq_res, task)) + &lgp->res.seq_res, 0, task)) return; if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, NFS_I(lgp->args.inode)->layout, @@ -5908,7 +5725,6 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; - nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5929,7 +5745,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, - &lrp->res.seq_res, task)) + &lrp->res.seq_res, 0, task)) return; rpc_call_start(task); } @@ -5995,7 +5811,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) int status; dprintk("--> %s\n", __func__); - nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -6096,7 +5911,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(data->args.inode); if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, task)) + &data->res.seq_res, 1, task)) return; rpc_call_start(task); } @@ -6183,7 +5998,6 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) data->args.lastbytewritten, data->args.inode->i_ino); - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -6277,12 +6091,11 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, out: return err; } - -static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) { int status; struct nfs41_test_stateid_args args = { - .stateid = stateid, + .stateid = &state->stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -6290,31 +6103,28 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_argp = &args, .rpc_resp = &res, }; - - nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - - if (status == NFS_OK) - return res.status; + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); return status; } -static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs41_test_stateid(server, stateid), + _nfs41_test_stateid(server, state), &exception); } while (exception.retry); return err; } -static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) { + int status; struct nfs41_free_stateid_args args = { - .stateid = stateid, + .stateid = &state->stateid, }; struct nfs41_free_stateid_res res; struct rpc_message msg = { @@ -6323,46 +6133,25 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_resp = &res, }; - nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; } -static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_free_stateid(server, stateid), + _nfs4_free_stateid(server, state), &exception); } while (exception.retry); return err; } - -static bool nfs41_match_stateid(const nfs4_stateid *s1, - const nfs4_stateid *s2) -{ - if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) - return false; - - if (s1->seqid == s2->seqid) - return true; - if (s1->seqid == 0 || s2->seqid == 0) - return true; - - return false; -} - #endif /* CONFIG_NFS_V4_1 */ -static bool nfs4_match_stateid(const nfs4_stateid *s1, - const nfs4_stateid *s2) -{ - return nfs4_stateid_match(s1, s2); -} - - -static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { +struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, @@ -6372,7 +6161,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { }; #if defined(CONFIG_NFS_V4_1) -static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { +struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, @@ -6383,7 +6172,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { }; #endif /* CONFIG_NFS_V4_1 */ -static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { +struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, @@ -6393,7 +6182,7 @@ static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { }; #if defined(CONFIG_NFS_V4_1) -static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { +struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs41_open_expired, @@ -6403,14 +6192,14 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { }; #endif /* CONFIG_NFS_V4_1 */ -static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { +struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { .sched_state_renewal = nfs4_proc_async_renew, .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, .renew_lease = nfs4_proc_renew, }; #if defined(CONFIG_NFS_V4_1) -static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { +struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { .sched_state_renewal = nfs41_proc_async_sequence, .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, .renew_lease = nfs4_proc_sequence, @@ -6420,7 +6209,7 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, - .match_stateid = nfs4_match_stateid, + .validate_stateid = nfs4_validate_delegation_stateid, .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, @@ -6431,7 +6220,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, - .match_stateid = nfs41_match_stateid, + .validate_stateid = nfs41_validate_delegation_stateid, .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, @@ -6471,11 +6260,9 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .create = nfs4_proc_create, .remove = nfs4_proc_remove, .unlink_setup = nfs4_proc_unlink_setup, - .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, .unlink_done = nfs4_proc_unlink_done, .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, - .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, .rename_done = nfs4_proc_rename_done, .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, @@ -6489,10 +6276,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, - .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_done = nfs4_commit_done, @@ -6516,10 +6301,6 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; -module_param(max_session_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " - "requests the client will negotiate"); - /* * Local variables: * c-basic-offset: 8 diff --git a/trunk/fs/nfs/nfs4state.c b/trunk/fs/nfs/nfs4state.c index 0f43414eb25a..45392032e7bd 100644 --- a/trunk/fs/nfs/nfs4state.c +++ b/trunk/fs/nfs/nfs4state.c @@ -146,11 +146,6 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) struct rpc_cred *cred = NULL; struct nfs_server *server; - /* Use machine credentials if available */ - cred = nfs4_get_machine_cred_locked(clp); - if (cred != NULL) - goto out; - rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { cred = nfs4_get_renew_cred_server_locked(server); @@ -158,8 +153,6 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) break; } rcu_read_unlock(); - -out: return cred; } @@ -197,29 +190,30 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; - struct nfs4_slot_table *tbl; int max_slots; if (ses == NULL) return; - tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - spin_lock(&tbl->slot_tbl_lock); - max_slots = tbl->max_slots; + spin_lock(&ses->fc_slot_table.slot_tbl_lock); + max_slots = ses->fc_slot_table.max_slots; while (max_slots--) { - if (rpc_wake_up_first(&tbl->slot_tbl_waitq, - nfs4_set_task_privileged, - NULL) == NULL) + struct rpc_task *task; + + task = rpc_wake_up_next(&ses->fc_slot_table. + slot_tbl_waitq); + if (!task) break; + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); } - spin_unlock(&tbl->slot_tbl_lock); + spin_unlock(&ses->fc_slot_table.slot_tbl_lock); } } static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) { spin_lock(&tbl->slot_tbl_lock); - if (tbl->highest_used_slotid != NFS4_NO_SLOT) { + if (tbl->highest_used_slotid != -1) { INIT_COMPLETION(tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); @@ -323,6 +317,62 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) return cred; } +static void nfs_alloc_unique_id_locked(struct rb_root *root, + struct nfs_unique_id *new, + __u64 minval, int maxbits) +{ + struct rb_node **p, *parent; + struct nfs_unique_id *pos; + __u64 mask = ~0ULL; + + if (maxbits < 64) + mask = (1ULL << maxbits) - 1ULL; + + /* Ensure distribution is more or less flat */ + get_random_bytes(&new->id, sizeof(new->id)); + new->id &= mask; + if (new->id < minval) + new->id += minval; +retry: + p = &root->rb_node; + parent = NULL; + + while (*p != NULL) { + parent = *p; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + + if (new->id < pos->id) + p = &(*p)->rb_left; + else if (new->id > pos->id) + p = &(*p)->rb_right; + else + goto id_exists; + } + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, root); + return; +id_exists: + for (;;) { + new->id++; + if (new->id < minval || (new->id & mask) != new->id) { + new->id = minval; + break; + } + parent = rb_next(parent); + if (parent == NULL) + break; + pos = rb_entry(parent, struct nfs_unique_id, rb_node); + if (new->id < pos->id) + break; + } + goto retry; +} + +static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) +{ + rb_erase(&id->rb_node, root); +} + static struct nfs4_state_owner * nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) { @@ -355,7 +405,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; - int err; while (*p != NULL) { parent = *p; @@ -372,9 +421,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) return sp; } } - err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); - if (err) - return ERR_PTR(err); + nfs_alloc_unique_id_locked(&server->openowner_id, + &new->so_owner_id, 1, 64); rb_link_node(&new->so_server_node, parent, p); rb_insert_color(&new->so_server_node, &server->state_owners); return new; @@ -387,23 +435,7 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) if (!RB_EMPTY_NODE(&sp->so_server_node)) rb_erase(&sp->so_server_node, &server->state_owners); - ida_remove(&server->openowner_id, sp->so_seqid.owner_id); -} - -static void -nfs4_init_seqid_counter(struct nfs_seqid_counter *sc) -{ - sc->flags = 0; - sc->counter = 0; - spin_lock_init(&sc->lock); - INIT_LIST_HEAD(&sc->list); - rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue"); -} - -static void -nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc) -{ - rpc_destroy_wait_queue(&sc->wait); + nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); } /* @@ -412,20 +444,19 @@ nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc) * */ static struct nfs4_state_owner * -nfs4_alloc_state_owner(struct nfs_server *server, - struct rpc_cred *cred, - gfp_t gfp_flags) +nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kzalloc(sizeof(*sp), gfp_flags); + sp = kzalloc(sizeof(*sp),GFP_NOFS); if (!sp) return NULL; - sp->so_server = server; - sp->so_cred = get_rpccred(cred); spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); - nfs4_init_seqid_counter(&sp->so_seqid); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); return sp; @@ -447,7 +478,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { - nfs4_destroy_seqid_counter(&sp->so_seqid); + rpc_destroy_wait_queue(&sp->so_sequence.wait); put_rpccred(sp->so_cred); kfree(sp); } @@ -485,8 +516,7 @@ static void nfs4_gc_state_owners(struct nfs_server *server) * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, - struct rpc_cred *cred, - gfp_t gfp_flags) + struct rpc_cred *cred) { struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; @@ -496,18 +526,20 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, spin_unlock(&clp->cl_lock); if (sp != NULL) goto out; - new = nfs4_alloc_state_owner(server, cred, gfp_flags); + new = nfs4_alloc_state_owner(); if (new == NULL) goto out; - do { - if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) - break; - spin_lock(&clp->cl_lock); - sp = nfs4_insert_state_owner_locked(new); - spin_unlock(&clp->cl_lock); - } while (sp == ERR_PTR(-EAGAIN)); - if (sp != new) - nfs4_free_state_owner(new); + new->so_server = server; + new->so_cred = cred; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner_locked(new); + spin_unlock(&clp->cl_lock); + if (sp == new) + get_rpccred(cred); + else { + rpc_destroy_wait_queue(&new->so_sequence.wait); + kfree(new); + } out: nfs4_gc_state_owners(server); return sp; @@ -763,11 +795,15 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; + struct nfs_client *clp = server->nfs_client; lsp = kzalloc(sizeof(*lsp), GFP_NOFS); if (lsp == NULL) return NULL; - nfs4_init_seqid_counter(&lsp->ls_seqid); + rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); + spin_lock_init(&lsp->ls_sequence.lock); + INIT_LIST_HEAD(&lsp->ls_sequence.list); + lsp->ls_seqid.sequence = &lsp->ls_sequence; atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner.lo_type = type; @@ -779,22 +815,25 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f lsp->ls_owner.lo_u.posix_owner = fl_owner; break; default: - goto out_free; + kfree(lsp); + return NULL; } - lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); - if (lsp->ls_seqid.owner_id < 0) - goto out_free; + spin_lock(&clp->cl_lock); + nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); + spin_unlock(&clp->cl_lock); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; -out_free: - kfree(lsp); - return NULL; } -void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) { - ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id); - nfs4_destroy_seqid_counter(&lsp->ls_seqid); + struct nfs_server *server = lsp->ls_state->owner->so_server; + struct nfs_client *clp = server->nfs_client; + + spin_lock(&clp->cl_lock); + nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id); + spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&lsp->ls_sequence.wait); kfree(lsp); } @@ -826,7 +865,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ } spin_unlock(&state->state_lock); if (new != NULL) - nfs4_free_lock_state(state->owner->so_server, new); + nfs4_free_lock_state(new); return lsp; } @@ -847,11 +886,9 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { - if (nfs4_release_lockowner(lsp) == 0) - return; - } - nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp); + if (lsp->ls_flags & NFS_LOCK_INITIALIZED) + nfs4_release_lockowner(lsp); + nfs4_free_lock_state(lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) @@ -881,8 +918,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_flags & FL_POSIX) lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, - NFS4_FLOCK_LOCK_TYPE); + lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); else return -EINVAL; if (lsp == NULL) @@ -892,49 +928,28 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, - fl_owner_t fl_owner, pid_t fl_pid) +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) { struct nfs4_lock_state *lsp; - bool ret = false; + int seq; + do { + seq = read_seqbegin(&state->seqlock); + memcpy(dst, &state->stateid, sizeof(*dst)); + } while (read_seqretry(&state->seqlock, seq)); if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) - goto out; + return; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { - nfs4_stateid_copy(dst, &lsp->ls_stateid); - ret = true; - } + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); -out: - return ret; -} - -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) -{ - int seq; - - do { - seq = read_seqbegin(&state->seqlock); - nfs4_stateid_copy(dst, &state->stateid); - } while (read_seqretry(&state->seqlock, seq)); -} - -/* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. - */ -void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, - fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) -{ - if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) - return; - if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) - return; - nfs4_copy_open_stateid(dst, state); } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) @@ -945,28 +960,20 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m if (new != NULL) { new->sequence = counter; INIT_LIST_HEAD(&new->list); - new->task = NULL; } return new; } void nfs_release_seqid(struct nfs_seqid *seqid) { - struct nfs_seqid_counter *sequence; + if (!list_empty(&seqid->list)) { + struct rpc_sequence *sequence = seqid->sequence->sequence; - if (list_empty(&seqid->list)) - return; - sequence = seqid->sequence; - spin_lock(&sequence->lock); - list_del_init(&seqid->list); - if (!list_empty(&sequence->list)) { - struct nfs_seqid *next; - - next = list_first_entry(&sequence->list, - struct nfs_seqid, list); - rpc_wake_up_queued_task(&sequence->wait, next->task); + spin_lock(&sequence->lock); + list_del_init(&seqid->list); + spin_unlock(&sequence->lock); + rpc_wake_up(&sequence->wait); } - spin_unlock(&sequence->lock); } void nfs_free_seqid(struct nfs_seqid *seqid) @@ -982,14 +989,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid) */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { - BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid); + BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); switch (status) { case 0: break; case -NFS4ERR_BAD_SEQID: if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) return; - pr_warn_ratelimited("NFS: v4 server returned a bad" + printk(KERN_WARNING "NFS: v4 server returned a bad" " sequence-id error on an" " unconfirmed sequence %p!\n", seqid->sequence); @@ -1033,11 +1040,10 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) { - struct nfs_seqid_counter *sequence = seqid->sequence; + struct rpc_sequence *sequence = seqid->sequence->sequence; int status = 0; spin_lock(&sequence->lock); - seqid->task = task; if (list_empty(&seqid->list)) list_add_tail(&seqid->list, &sequence->list); if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) @@ -1066,28 +1072,19 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp) void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; - char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); - - /* The rcu_read_lock() is not strictly necessary, as the state - * manager is the only thread that ever changes the rpc_xprt - * after it's initialized. At this point, we're single threaded. */ - rcu_read_lock(); - snprintf(buf, sizeof(buf), "%s-manager", - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); - rcu_read_unlock(); - task = kthread_run(nfs4_run_state_manager, clp, buf); - if (IS_ERR(task)) { - printk(KERN_ERR "%s: kthread_run: %ld\n", - __func__, PTR_ERR(task)); - nfs4_clear_state_manager_bit(clp); - nfs_put_client(clp); - module_put(THIS_MODULE); - } + task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); + if (!IS_ERR(task)) + return; + nfs4_clear_state_manager_bit(clp); + nfs_put_client(clp); + module_put(THIS_MODULE); } /* @@ -1101,25 +1098,10 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); nfs4_schedule_state_manager(clp); } -EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); - -/* - * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN - * @clp: client to process - * - * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a - * resend of the SETCLIENTID and hence re-establish the - * callback channel. Then return all existing delegations. - */ -static void nfs40_handle_cb_pathdown(struct nfs_client *clp) -{ - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs_expire_all_delegations(clp); -} void nfs4_schedule_path_down_recovery(struct nfs_client *clp) { - nfs40_handle_cb_pathdown(clp); + nfs_handle_cb_pathdown(clp); nfs4_schedule_state_manager(clp); } @@ -1150,37 +1132,11 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 { struct nfs_client *clp = server->nfs_client; + if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) + nfs_async_inode_return_delegation(state->inode, &state->stateid); nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } -EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); - -void nfs_inode_find_state_and_recover(struct inode *inode, - const nfs4_stateid *stateid) -{ - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_open_context *ctx; - struct nfs4_state *state; - bool found = false; - - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { - state = ctx->state; - if (state == NULL) - continue; - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) - continue; - if (!nfs4_stateid_match(&state->stateid, stateid)) - continue; - nfs4_state_mark_reclaim_nograce(clp, state); - found = true; - } - spin_unlock(&inode->i_lock); - if (found) - nfs4_schedule_state_manager(clp); -} - static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1219,8 +1175,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: - printk(KERN_ERR "NFS: %s: unhandled error %d. " - "Zeroing state\n", __func__, status); + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __func__, status); case -ENOMEM: case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: @@ -1266,9 +1222,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) - pr_warn_ratelimited("NFS: " - "%s: Lock reclaim " - "failed!\n", __func__); + printk("%s: Lock reclaim failed!\n", + __func__); } spin_unlock(&state->state_lock); nfs4_put_open_state(state); @@ -1277,8 +1232,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs } switch (status) { default: - printk(KERN_ERR "NFS: %s: unhandled error %d. " - "Zeroing state\n", __func__, status); + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __func__, status); case -ENOENT: case -ENOMEM: case -ESTALE: @@ -1286,8 +1241,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. */ - memset(&state->stateid, 0, - sizeof(state->stateid)); + memset(state->stateid.data, 0, + sizeof(state->stateid.data)); /* Mark the file as being 'closed' */ state->state = 0; break; @@ -1465,7 +1420,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case 0: break; case -NFS4ERR_CB_PATH_DOWN: - nfs40_handle_cb_pathdown(clp); + nfs_handle_cb_pathdown(clp); break; case -NFS4ERR_NO_GRACE: nfs4_state_end_reclaim_reboot(clp); @@ -1846,7 +1801,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" + printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); diff --git a/trunk/fs/nfs/nfs4xdr.c b/trunk/fs/nfs/nfs4xdr.c index c74fdb114b48..33bd8d0f745d 100644 --- a/trunk/fs/nfs/nfs4xdr.c +++ b/trunk/fs/nfs/nfs4xdr.c @@ -44,8 +44,6 @@ #include #include #include -#include -#include #include #include #include @@ -273,12 +271,7 @@ static int nfs4_stat_to_errno(int); 1 /* flags */ + \ 1 /* spa_how */ + \ 0 /* SP4_NONE (for now) */ + \ - 1 /* implementation id array of size 1 */ + \ - 1 /* nii_domain */ + \ - XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ - 1 /* nii_name */ + \ - XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ - 3 /* nii_date */) + 1 /* zero implemetation id array */) #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 2 /* eir_clientid */ + \ 1 /* eir_sequenceid */ + \ @@ -291,11 +284,7 @@ static int nfs4_stat_to_errno(int); /* eir_server_scope<> */ \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ 1 /* eir_server_impl_id array length */ + \ - 1 /* nii_domain */ + \ - XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ - 1 /* nii_name */ + \ - XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ - 3 /* nii_date */) + 0 /* ignored eir_server_impl_id contents */) #define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) #define decode_channel_attrs_maxsz (6 + \ 1 /* ca_rdma_ird.len */ + \ @@ -849,12 +838,6 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ -static unsigned short send_implementation_id = 1; - -module_param(send_implementation_id, ushort, 0644); -MODULE_PARM_DESC(send_implementation_id, - "Send implementation ID with NFSv4.1 exchange_id"); - static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, @@ -885,44 +868,15 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) return p; } -static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, len); - xdr_encode_opaque_fixed(p, buf, len); -} - static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { __be32 *p; - p = reserve_space(xdr, 4 + len); + p = xdr_reserve_space(xdr, 4 + len); + BUG_ON(p == NULL); xdr_encode_opaque(p, str, len); } -static void encode_uint32(struct xdr_stream *xdr, u32 n) -{ - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(n); -} - -static void encode_uint64(struct xdr_stream *xdr, u64 n) -{ - __be32 *p; - - p = reserve_space(xdr, 8); - xdr_encode_hyper(p, n); -} - -static void encode_nfs4_seqid(struct xdr_stream *xdr, - const struct nfs_seqid *seqid) -{ - encode_uint32(xdr, seqid->sequence->counter); -} - static void encode_compound_hdr(struct xdr_stream *xdr, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -935,37 +889,28 @@ static void encode_compound_hdr(struct xdr_stream *xdr, * but this is not required as a MUST for the server to do so. */ hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; + dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); - encode_string(xdr, hdr->taglen, hdr->tag); - p = reserve_space(xdr, 8); + p = reserve_space(xdr, 4 + hdr->taglen + 8); + p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); *p++ = cpu_to_be32(hdr->minorversion); hdr->nops_p = p; *p = cpu_to_be32(hdr->nops); } -static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op, - uint32_t replen, - struct compound_hdr *hdr) -{ - encode_uint32(xdr, op); - hdr->nops++; - hdr->replen += replen; -} - static void encode_nops(struct compound_hdr *hdr) { BUG_ON(hdr->nops > NFS4_MAX_OPS); *hdr->nops_p = htonl(hdr->nops); } -static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) -{ - encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); -} - static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) { - encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + BUG_ON(p == NULL); + xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); } static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) @@ -1078,7 +1023,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const * Now we backfill the bitmap and the attribute buffer length. */ if (len != ((char *)p - (char *)q) + 4) { - printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n", + printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", len, ((char *)p - (char *)q) + 4); BUG(); } @@ -1092,33 +1037,46 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr); - encode_uint32(xdr, access); + __be32 *p; + + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_ACCESS); + *p = cpu_to_be32(access); + hdr->nops++; + hdr->replen += decode_access_maxsz; } static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); - encode_nfs4_seqid(xdr, arg->seqid); - encode_nfs4_stateid(xdr, arg->stateid); + __be32 *p; + + p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_CLOSE); + *p++ = cpu_to_be32(arg->seqid->sequence->counter); + xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_close_maxsz; } static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); - p = reserve_space(xdr, 12); + p = reserve_space(xdr, 16); + *p++ = cpu_to_be32(OP_COMMIT); p = xdr_encode_hyper(p, args->offset); *p = cpu_to_be32(args->count); + hdr->nops++; + hdr->replen += decode_commit_maxsz; } static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr); - encode_uint32(xdr, create->ftype); + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_CREATE); + *p = cpu_to_be32(create->ftype); switch (create->ftype) { case NF4LNK: @@ -1138,6 +1096,9 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); + hdr->nops++; + hdr->replen += decode_create_maxsz; + encode_attrs(xdr, create->attrs, create->server); } @@ -1145,21 +1106,25 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c { __be32 *p; - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - p = reserve_space(xdr, 8); + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(OP_GETATTR); *p++ = cpu_to_be32(1); *p = cpu_to_be32(bitmap); + hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - p = reserve_space(xdr, 12); + p = reserve_space(xdr, 16); + *p++ = cpu_to_be32(OP_GETATTR); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(bm0); *p = cpu_to_be32(bm1); + hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void @@ -1169,7 +1134,8 @@ encode_getattr_three(struct xdr_stream *xdr, { __be32 *p; - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_GETATTR); if (bm2) { p = reserve_space(xdr, 16); *p++ = cpu_to_be32(3); @@ -1186,6 +1152,8 @@ encode_getattr_three(struct xdr_stream *xdr, *p++ = cpu_to_be32(1); *p = cpu_to_be32(bm0); } + hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -1211,13 +1179,23 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_GETFH); + hdr->nops++; + hdr->replen += decode_getfh_maxsz; } static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr); - encode_string(xdr, name->len, name->name); + __be32 *p; + + p = reserve_space(xdr, 8 + name->len); + *p++ = cpu_to_be32(OP_LINK); + xdr_encode_opaque(p, name->name, name->len); + hdr->nops++; + hdr->replen += decode_link_maxsz; } static inline int nfs4_lock_type(struct file_lock *fl, int block) @@ -1254,60 +1232,79 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args { __be32 *p; - encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr); - p = reserve_space(xdr, 28); + p = reserve_space(xdr, 32); + *p++ = cpu_to_be32(OP_LOCK); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); *p++ = cpu_to_be32(args->reclaim); p = xdr_encode_hyper(p, args->fl->fl_start); p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); *p = cpu_to_be32(args->new_lock_owner); if (args->new_lock_owner){ - encode_nfs4_seqid(xdr, args->open_seqid); - encode_nfs4_stateid(xdr, args->open_stateid); - encode_nfs4_seqid(xdr, args->lock_seqid); + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); + *p++ = cpu_to_be32(args->open_seqid->sequence->counter); + p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); encode_lockowner(xdr, &args->lock_owner); } else { - encode_nfs4_stateid(xdr, args->lock_stateid); - encode_nfs4_seqid(xdr, args->lock_seqid); + p = reserve_space(xdr, NFS4_STATEID_SIZE+4); + p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(args->lock_seqid->sequence->counter); } + hdr->nops++; + hdr->replen += decode_lock_maxsz; } static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr); - p = reserve_space(xdr, 20); + p = reserve_space(xdr, 24); + *p++ = cpu_to_be32(OP_LOCKT); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); p = xdr_encode_hyper(p, args->fl->fl_start); p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); encode_lockowner(xdr, &args->lock_owner); + hdr->nops++; + hdr->replen += decode_lockt_maxsz; } static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); - encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); - encode_nfs4_seqid(xdr, args->seqid); - encode_nfs4_stateid(xdr, args->stateid); - p = reserve_space(xdr, 16); + p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); + *p++ = cpu_to_be32(OP_LOCKU); + *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); + *p++ = cpu_to_be32(args->seqid->sequence->counter); + p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); p = xdr_encode_hyper(p, args->fl->fl_start); xdr_encode_hyper(p, nfs4_lock_length(args->fl)); + hdr->nops++; + hdr->replen += decode_locku_maxsz; } static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_RELEASE_LOCKOWNER); encode_lockowner(xdr, lowner); + hdr->nops++; + hdr->replen += decode_release_lockowner_maxsz; } static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr); - encode_string(xdr, name->len, name->name); + int len = name->len; + __be32 *p; + + p = reserve_space(xdr, 8 + len); + *p++ = cpu_to_be32(OP_LOOKUP); + xdr_encode_opaque(p, name->name, len); + hdr->nops++; + hdr->replen += decode_lookup_maxsz; } static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) @@ -1338,7 +1335,9 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, * owner 4 = 32 */ - encode_nfs4_seqid(xdr, arg->seqid); + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_OPEN); + *p = cpu_to_be32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); p = reserve_space(xdr, 32); p = xdr_encode_hyper(p, arg->clientid); @@ -1438,15 +1437,14 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); - encode_nfs4_stateid(xdr, stateid); + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); encode_string(xdr, name->len, name->name); } static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); encode_openhdr(xdr, arg); encode_opentype(xdr, arg); switch (arg->claim) { @@ -1462,64 +1460,88 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, default: BUG(); } + hdr->nops++; + hdr->replen += decode_open_maxsz; } static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr); - encode_nfs4_stateid(xdr, arg->stateid); - encode_nfs4_seqid(xdr, arg->seqid); + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); + *p++ = cpu_to_be32(OP_OPEN_CONFIRM); + p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(arg->seqid->sequence->counter); + hdr->nops++; + hdr->replen += decode_open_confirm_maxsz; } static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); - encode_nfs4_stateid(xdr, arg->stateid); - encode_nfs4_seqid(xdr, arg->seqid); + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); + *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE); + p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); + hdr->nops++; + hdr->replen += decode_open_downgrade_maxsz; } static void encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr); - encode_string(xdr, fh->size, fh->data); + int len = fh->size; + __be32 *p; + + p = reserve_space(xdr, 8 + len); + *p++ = cpu_to_be32(OP_PUTFH); + xdr_encode_opaque(p, fh->data, len); + hdr->nops++; + hdr->replen += decode_putfh_maxsz; } static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_PUTROOTFH); + hdr->nops++; + hdr->replen += decode_putrootfh_maxsz; } -static void encode_open_stateid(struct xdr_stream *xdr, - const struct nfs_open_context *ctx, - const struct nfs_lock_context *l_ctx, - fmode_t fmode, - int zero_seqid) +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) { nfs4_stateid stateid; + __be32 *p; + p = reserve_space(xdr, NFS4_STATEID_SIZE); if (ctx->state != NULL) { - nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, l_ctx->lockowner, l_ctx->pid); + nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); if (zero_seqid) - stateid.seqid = 0; - encode_nfs4_stateid(xdr, &stateid); + stateid.stateid.seqid = 0; + xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); } else - encode_nfs4_stateid(xdr, &zero_stateid); + xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); } static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_READ, hdr->minorversion); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_READ); + + encode_stateid(xdr, args->context, args->lock_context, + hdr->minorversion); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); *p = cpu_to_be32(args->count); + hdr->nops++; + hdr->replen += decode_read_maxsz; } static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1529,7 +1551,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count >> 1; - __be32 *p, verf[2]; + __be32 *p; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| @@ -1544,54 +1566,80 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; - encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); - encode_uint64(xdr, readdir->cookie); - encode_nfs4_verifier(xdr, &readdir->verifier); - p = reserve_space(xdr, 20); + p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); + *p++ = cpu_to_be32(OP_READDIR); + p = xdr_encode_hyper(p, readdir->cookie); + p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); - memcpy(verf, readdir->verifier.data, sizeof(verf)); + hdr->nops++; + hdr->replen += decode_readdir_maxsz; dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, (unsigned long long)readdir->cookie, - verf[0], verf[1], + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1], attrs[0] & readdir->bitmask[0], attrs[1] & readdir->bitmask[1]); } static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_READLINK); + hdr->nops++; + hdr->replen += decode_readlink_maxsz; } static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr); - encode_string(xdr, name->len, name->name); + __be32 *p; + + p = reserve_space(xdr, 8 + name->len); + *p++ = cpu_to_be32(OP_REMOVE); + xdr_encode_opaque(p, name->name, name->len); + hdr->nops++; + hdr->replen += decode_remove_maxsz; } static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_RENAME); encode_string(xdr, oldname->len, oldname->name); encode_string(xdr, newname->len, newname->name); + hdr->nops++; + hdr->replen += decode_rename_maxsz; } -static void encode_renew(struct xdr_stream *xdr, clientid4 clid, - struct compound_hdr *hdr) +static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr); - encode_uint64(xdr, clid); + __be32 *p; + + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(OP_RENEW); + xdr_encode_hyper(p, client_stateid->cl_clientid); + hdr->nops++; + hdr->replen += decode_renew_maxsz; } static void encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_RESTOREFH); + hdr->nops++; + hdr->replen += decode_restorefh_maxsz; } static void @@ -1599,8 +1647,9 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun { __be32 *p; - encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr); - encode_nfs4_stateid(xdr, &zero_stateid); + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_SETATTR); + xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); *p = cpu_to_be32(FATTR4_WORD0_ACL); @@ -1608,18 +1657,30 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + hdr->nops++; + hdr->replen += decode_setacl_maxsz; } static void encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr); + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_SAVEFH); + hdr->nops++; + hdr->replen += decode_savefh_maxsz; } static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); - encode_nfs4_stateid(xdr, &arg->stateid); + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_SETATTR); + xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_setattr_maxsz; encode_attrs(xdr, arg->iap, server); } @@ -1627,8 +1688,9 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie { __be32 *p; - encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); - encode_nfs4_verifier(xdr, setclientid->sc_verifier); + p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); + *p++ = cpu_to_be32(OP_SETCLIENTID); + xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); p = reserve_space(xdr, 4); @@ -1637,23 +1699,31 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_cb_ident); + hdr->nops++; + hdr->replen += decode_setclientid_maxsz; } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM, - decode_setclientid_confirm_maxsz, hdr); - encode_uint64(xdr, arg->clientid); - encode_nfs4_verifier(xdr, &arg->confirm); + __be32 *p; + + p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); + *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); + p = xdr_encode_hyper(p, arg->clientid); + xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE); + hdr->nops++; + hdr->replen += decode_setclientid_confirm_maxsz; } static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; - encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_WRITE, hdr->minorversion); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_WRITE); + + encode_stateid(xdr, args->context, args->lock_context, + hdr->minorversion); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); @@ -1661,18 +1731,32 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg *p = cpu_to_be32(args->count); xdr_write_pages(xdr, args->pages, args->pgbase, args->count); + hdr->nops++; + hdr->replen += decode_write_maxsz; } static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr); - encode_nfs4_stateid(xdr, stateid); + __be32 *p; + + p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); + + *p++ = cpu_to_be32(OP_DELEGRETURN); + xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_delegreturn_maxsz; } static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr); - encode_string(xdr, name->len, name->name); + int len = name->len; + __be32 *p; + + p = reserve_space(xdr, 8 + len); + *p++ = cpu_to_be32(OP_SECINFO); + xdr_encode_opaque(p, name->name, len); + hdr->nops++; + hdr->replen += decode_secinfo_maxsz; } #if defined(CONFIG_NFS_V4_1) @@ -1682,39 +1766,19 @@ static void encode_exchange_id(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - char impl_name[NFS4_OPAQUE_LIMIT]; - int len = 0; - encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); - encode_nfs4_verifier(xdr, args->verifier); + p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); + *p++ = cpu_to_be32(OP_EXCHANGE_ID); + xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data)); encode_string(xdr, args->id_len, args->id); p = reserve_space(xdr, 12); *p++ = cpu_to_be32(args->flags); *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ - - if (send_implementation_id && - sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && - sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - <= NFS4_OPAQUE_LIMIT + 1) - len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s", - utsname()->sysname, utsname()->release, - utsname()->version, utsname()->machine); - - if (len > 0) { - *p = cpu_to_be32(1); /* implementation id array length=1 */ - - encode_string(xdr, - sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1, - CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN); - encode_string(xdr, len, impl_name); - /* just send zeros for nii_date - the date is in nii_name */ - p = reserve_space(xdr, 12); - p = xdr_encode_hyper(p, 0); - *p = cpu_to_be32(0); - } else - *p = cpu_to_be32(0); /* implementation id array length=0 */ + *p = cpu_to_be32(0); /* zero length implementation id array */ + hdr->nops++; + hdr->replen += decode_exchange_id_maxsz; } static void encode_create_session(struct xdr_stream *xdr, @@ -1737,8 +1801,8 @@ static void encode_create_session(struct xdr_stream *xdr, len = scnprintf(machine_name, sizeof(machine_name), "%s", clp->cl_ipaddr); - encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); - p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12); + p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); + *p++ = cpu_to_be32(OP_CREATE_SESSION); p = xdr_encode_hyper(p, clp->cl_clientid); *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ *p++ = cpu_to_be32(args->flags); /*flags */ @@ -1771,22 +1835,33 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ *p = cpu_to_be32(0); /* No more gids */ + hdr->nops++; + hdr->replen += decode_create_session_maxsz; } static void encode_destroy_session(struct xdr_stream *xdr, struct nfs4_session *session, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr); - encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(OP_DESTROY_SESSION); + xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + hdr->nops++; + hdr->replen += decode_destroy_session_maxsz; } static void encode_reclaim_complete(struct xdr_stream *xdr, struct nfs41_reclaim_complete_args *args, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr); - encode_uint32(xdr, args->one_fs); + __be32 *p; + + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE); + *p++ = cpu_to_be32(args->one_fs); + hdr->nops++; + hdr->replen += decode_reclaim_complete_maxsz; } #endif /* CONFIG_NFS_V4_1 */ @@ -1808,7 +1883,8 @@ static void encode_sequence(struct xdr_stream *xdr, WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); slot = tp->slots + args->sa_slotid; - encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); + p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); + *p++ = cpu_to_be32(OP_SEQUENCE); /* * Sessionid + seqid + slotid + max slotid + cache_this @@ -1822,12 +1898,13 @@ static void encode_sequence(struct xdr_stream *xdr, ((u32 *)session->sess_id.data)[3], slot->seq_nr, args->sa_slotid, tp->highest_used_slotid, args->sa_cache_this); - p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(slot->seq_nr); *p++ = cpu_to_be32(args->sa_slotid); *p++ = cpu_to_be32(tp->highest_used_slotid); *p = cpu_to_be32(args->sa_cache_this); + hdr->nops++; + hdr->replen += decode_sequence_maxsz; #endif /* CONFIG_NFS_V4_1 */ } @@ -1842,12 +1919,14 @@ encode_getdevicelist(struct xdr_stream *xdr, .data = "dummmmmy", }; - encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); - p = reserve_space(xdr, 16); + p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(OP_GETDEVICELIST); *p++ = cpu_to_be32(args->layoutclass); *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); xdr_encode_hyper(p, 0ULL); /* cookie */ encode_nfs4_verifier(xdr, &dummy); + hdr->nops++; + hdr->replen += decode_getdevicelist_maxsz; } static void @@ -1857,13 +1936,15 @@ encode_getdeviceinfo(struct xdr_stream *xdr, { __be32 *p; - encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); - p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); + p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(OP_GETDEVICEINFO); p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(args->pdev->layout_type); *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ *p++ = cpu_to_be32(0); /* bitmap length 0 */ + hdr->nops++; + hdr->replen += decode_getdeviceinfo_maxsz; } static void @@ -1873,16 +1954,16 @@ encode_layoutget(struct xdr_stream *xdr, { __be32 *p; - encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr); - p = reserve_space(xdr, 36); + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_LAYOUTGET); *p++ = cpu_to_be32(0); /* Signal layout available */ *p++ = cpu_to_be32(args->type); *p++ = cpu_to_be32(args->range.iomode); p = xdr_encode_hyper(p, args->range.offset); p = xdr_encode_hyper(p, args->range.length); p = xdr_encode_hyper(p, args->minlength); - encode_nfs4_stateid(xdr, &args->stateid); - encode_uint32(xdr, args->maxcount); + p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(args->maxcount); dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", __func__, @@ -1891,6 +1972,8 @@ encode_layoutget(struct xdr_stream *xdr, (unsigned long)args->range.offset, (unsigned long)args->range.length, args->maxcount); + hdr->nops++; + hdr->replen += decode_layoutget_maxsz; } static int @@ -1904,14 +1987,13 @@ encode_layoutcommit(struct xdr_stream *xdr, dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, NFS_SERVER(args->inode)->pnfs_curr_ld->id); - encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr); - p = reserve_space(xdr, 20); + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); /* Only whole file layouts */ p = xdr_encode_hyper(p, 0); /* offset */ p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ - *p = cpu_to_be32(0); /* reclaim */ - encode_nfs4_stateid(xdr, &args->stateid); - p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(0); /* reclaim */ + p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); *p++ = cpu_to_be32(1); /* newoffset = TRUE */ p = xdr_encode_hyper(p, args->lastbytewritten); *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ @@ -1920,9 +2002,13 @@ encode_layoutcommit(struct xdr_stream *xdr, if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( NFS_I(inode)->layout, xdr, args); - else - encode_uint32(xdr, 0); /* no layout-type payload */ + else { + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); /* no layout-type payload */ + } + hdr->nops++; + hdr->replen += decode_layoutcommit_maxsz; return 0; } @@ -1933,23 +2019,27 @@ encode_layoutreturn(struct xdr_stream *xdr, { __be32 *p; - encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); - p = reserve_space(xdr, 16); + p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(OP_LAYOUTRETURN); *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ *p++ = cpu_to_be32(args->layout_type); *p++ = cpu_to_be32(IOMODE_ANY); *p = cpu_to_be32(RETURN_FILE); - p = reserve_space(xdr, 16); + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); p = xdr_encode_hyper(p, 0); p = xdr_encode_hyper(p, NFS4_MAX_UINT64); spin_lock(&args->inode->i_lock); - encode_nfs4_stateid(xdr, &args->stateid); + xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); spin_unlock(&args->inode->i_lock); if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( NFS_I(args->inode)->layout, xdr, args); - } else - encode_uint32(xdr, 0); + } else { + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); + } + hdr->nops++; + hdr->replen += decode_layoutreturn_maxsz; } static int @@ -1957,8 +2047,12 @@ encode_secinfo_no_name(struct xdr_stream *xdr, const struct nfs41_secinfo_no_name_args *args, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr); - encode_uint32(xdr, args->style); + __be32 *p; + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); + *p++ = cpu_to_be32(args->style); + hdr->nops++; + hdr->replen += decode_secinfo_no_name_maxsz; return 0; } @@ -1966,17 +2060,26 @@ static void encode_test_stateid(struct xdr_stream *xdr, struct nfs41_test_stateid_args *args, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); - encode_uint32(xdr, 1); - encode_nfs4_stateid(xdr, args->stateid); + __be32 *p; + + p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_TEST_STATEID); + *p++ = cpu_to_be32(1); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_test_stateid_maxsz; } static void encode_free_stateid(struct xdr_stream *xdr, struct nfs41_free_stateid_args *args, struct compound_hdr *hdr) { - encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); - encode_nfs4_stateid(xdr, args->stateid); + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_FREE_STATEID); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_free_stateid_maxsz; } #endif /* CONFIG_NFS_V4_1 */ @@ -2530,7 +2633,6 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| - FATTR4_WORD0_FH_EXPIRE_TYPE| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| FATTR4_WORD0_ACLSUPPORT, &hdr); @@ -2548,7 +2650,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr, }; encode_compound_hdr(xdr, req, &hdr); - encode_renew(xdr, clp->cl_clientid, &hdr); + encode_renew(xdr, clp, &hdr); encode_nops(&hdr); } @@ -3078,28 +3180,6 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * return -EIO; } -static int decode_attr_fh_expire_type(struct xdr_stream *xdr, - uint32_t *bitmap, uint32_t *type) -{ - __be32 *p; - - *type = 0; - if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U))) - return -EIO; - if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) { - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - *type = be32_to_cpup(p); - bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE; - } - dprintk("%s: expire type=0x%x\n", __func__, *type); - return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; -} - static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) { __be32 *p; @@ -3433,17 +3513,16 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) n = be32_to_cpup(p); if (n == 0) goto root_path; - dprintk("pathname4: "); + dprintk("path "); path->ncomponents = 0; while (path->ncomponents < n) { struct nfs4_string *component = &path->components[path->ncomponents]; status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) goto out_eio; - ifdebug (XDR) - pr_cont("%s%.*s ", - (path->ncomponents != n ? "/ " : ""), - component->len, component->data); + if (path->ncomponents != n) + dprintk("/"); + dprintk("%s", component->data); if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) path->ncomponents++; else { @@ -3452,13 +3531,14 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) } } out: + dprintk("\n"); return status; root_path: /* a root pathname is sent as a zero component4 */ path->ncomponents = 1; path->components[0].len=0; path->components[0].data=NULL; - dprintk("pathname4: /\n"); + dprintk("path /\n"); goto out; out_eio: dprintk(" status %d", status); @@ -3480,11 +3560,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; - status = -EIO; - /* Ignore borken servers that return unrequested attrs */ - if (unlikely(res == NULL)) - goto out; - dprintk("%s: fsroot:\n", __func__); + dprintk("%s: fsroot ", __func__); status = decode_pathname(xdr, &res->fs_path); if (unlikely(status != 0)) goto out; @@ -3505,7 +3581,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st m = be32_to_cpup(p); loc->nservers = 0; - dprintk("%s: servers:\n", __func__); + dprintk("%s: servers ", __func__); while (loc->nservers < m) { struct nfs4_string *server = &loc->servers[loc->nservers]; status = decode_opaque_inline(xdr, &server->len, &server->data); @@ -3537,7 +3613,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st res->nlocations++; } if (res->nlocations != 0) - status = NFS_ATTR_FATTR_V4_LOCATIONS; + status = NFS_ATTR_FATTR_V4_REFERRAL; out: dprintk("%s: fs_locations done, error = %d\n", __func__, status); return status; @@ -4081,7 +4157,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { - return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); + return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); } static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) @@ -4098,7 +4174,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) static int decode_verifier(struct xdr_stream *xdr, void *verifier) { - return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); + return decode_opaque_fixed(xdr, verifier, 8); } static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) @@ -4148,9 +4224,6 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re goto xdr_error; if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) goto xdr_error; - if ((status = decode_attr_fh_expire_type(xdr, bitmap, - &res->fh_expire_type)) != 0) - goto xdr_error; if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) goto xdr_error; if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) @@ -4221,7 +4294,6 @@ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, - struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { int status; @@ -4269,7 +4341,9 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_fs_locations(xdr, bitmap, fs_loc); + status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + struct nfs4_fs_locations, + fattr)); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4333,8 +4407,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, } static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, - struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, - const struct nfs_server *server) + struct nfs_fh *fh, const struct nfs_server *server) { __be32 *savep; uint32_t attrlen, @@ -4353,7 +4426,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); if (status < 0) goto xdr_error; @@ -4366,7 +4439,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); + return decode_getfattr_generic(xdr, fattr, NULL, server); } /* @@ -4390,8 +4463,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, return 0; } if (num > 1) - printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout " - "drivers per filesystem not supported\n", __func__); + printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " + "per filesystem not supported\n", __func__); /* Decode and set first layout type, move xdr->p past unused types */ p = xdr_inline_decode(xdr, num * 4); @@ -4790,16 +4863,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n size_t hdrlen; u32 recvd, pglen = rcvbuf->page_len; int status; - __be32 verf[2]; status = decode_op_hdr(xdr, OP_READDIR); if (!status) status = decode_verifier(xdr, readdir->verifier.data); if (unlikely(status)) return status; - memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: verifier = %08x:%08x\n", - __func__, verf[0], verf[1]); + __func__, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1]); + hdrlen = (char *) xdr->p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -5046,7 +5120,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) goto out_overflow; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); - memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); + memcpy(res->verf->verifier, p, 8); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5140,7 +5214,6 @@ static int decode_exchange_id(struct xdr_stream *xdr, char *dummy_str; int status; struct nfs_client *clp = res->client; - uint32_t impl_id_count; status = decode_op_hdr(xdr, OP_EXCHANGE_ID); if (status) @@ -5182,38 +5255,11 @@ static int decode_exchange_id(struct xdr_stream *xdr, memcpy(res->server_scope->server_scope, dummy_str, dummy); res->server_scope->server_scope_sz = dummy; - /* Implementation Id */ - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - impl_id_count = be32_to_cpup(p++); - - if (impl_id_count) { - /* nii_domain */ - status = decode_opaque_inline(xdr, &dummy, &dummy_str); - if (unlikely(status)) - return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) - return -EIO; - memcpy(res->impl_id->domain, dummy_str, dummy); - - /* nii_name */ - status = decode_opaque_inline(xdr, &dummy, &dummy_str); - if (unlikely(status)) - return status; - if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) - return -EIO; - memcpy(res->impl_id->name, dummy_str, dummy); - - /* nii_date */ - p = xdr_inline_decode(xdr, 12); - if (unlikely(!p)) - goto out_overflow; - p = xdr_decode_hyper(p, &res->impl_id->date.seconds); - res->impl_id->date.nseconds = be32_to_cpup(p); + /* Throw away Implementation id array */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; - /* if there's more than one entry, ignore the rest */ - } return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5239,8 +5285,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr, attrs->max_reqs = be32_to_cpup(p++); nr_attrs = be32_to_cpup(p); if (unlikely(nr_attrs > 1)) { - printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs " - "count %u\n", __func__, nr_attrs); + printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", + __func__, nr_attrs); return -EINVAL; } if (nr_attrs == 1) { @@ -5390,14 +5436,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr, p += 2; /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); + p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); res->num_devs = be32_to_cpup(p); dprintk("%s: num_dev %d\n", __func__, res->num_devs); if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { - printk(KERN_ERR "NFS: %s too many result dev_num %u\n", + printk(KERN_ERR "%s too many result dev_num %u\n", __func__, res->num_devs); return -EIO; } @@ -5491,14 +5537,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) return status; - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - res->return_on_close = be32_to_cpup(p); - decode_stateid(xdr, &res->stateid); - p = xdr_inline_decode(xdr, 4); + p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); if (unlikely(!p)) goto out_overflow; + res->return_on_close = be32_to_cpup(p++); + p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); layout_count = be32_to_cpup(p); if (!layout_count) { dprintk("%s: server responded with empty layout array\n", @@ -5623,8 +5666,7 @@ static int decode_test_stateid(struct xdr_stream *xdr, if (unlikely(!p)) goto out_overflow; res->status = be32_to_cpup(p++); - - return status; + return res->status; out_overflow: print_overflow_msg(__func__, xdr); out: @@ -6541,9 +6583,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, if (status) goto out; xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, - NULL, res->fs_locations, - res->fs_locations->server); + status = decode_getfattr(xdr, &res->fs_locations->fattr, + res->fs_locations->server); out: return status; } @@ -6923,7 +6964,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - NULL, entry->server) < 0) + entry->server) < 0) goto out_overflow; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; @@ -7071,7 +7112,7 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ }; -const struct rpc_version nfs_version4 = { +struct rpc_version nfs_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nfs4_procedures), .procs = nfs4_procedures diff --git a/trunk/fs/nfs/nfsroot.c b/trunk/fs/nfs/nfsroot.c index cd3c910d2d12..c4744e1d513c 100644 --- a/trunk/fs/nfs/nfsroot.c +++ b/trunk/fs/nfs/nfsroot.c @@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = ""; /* server:export path string passed to super.c */ static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; -#ifdef NFS_DEBUG +#ifdef RPC_DEBUG /* * When the "nfsrootdebug" kernel command line option is specified, * enable debugging messages for NFSROOT. diff --git a/trunk/fs/nfs/objlayout/objio_osd.c b/trunk/fs/nfs/objlayout/objio_osd.c index 4bff4a3dab46..55d01280a609 100644 --- a/trunk/fs/nfs/objlayout/objio_osd.c +++ b/trunk/fs/nfs/objlayout/objio_osd.c @@ -137,7 +137,6 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, struct objio_dev_ent *ode; struct osd_dev *od; struct osd_dev_info odi; - bool retry_flag = true; int err; ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); @@ -172,18 +171,10 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, goto out; } -retry_lookup: od = osduld_info_lookup(&odi); if (unlikely(IS_ERR(od))) { err = PTR_ERR(od); dprintk("%s: osduld_info_lookup => %d\n", __func__, err); - if (err == -ENODEV && retry_flag) { - err = objlayout_autologin(deviceaddr); - if (likely(!err)) { - retry_flag = false; - goto retry_lookup; - } - } goto out; } @@ -214,36 +205,25 @@ static void copy_single_comp(struct ore_components *oc, unsigned c, int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, struct objio_segment **pseg) { -/* This is the in memory structure of the objio_segment - * - * struct __alloc_objio_segment { - * struct objio_segment olseg; - * struct ore_dev *ods[numdevs]; - * struct ore_comp comps[numdevs]; - * } *aolseg; - * NOTE: The code as above compiles and runs perfectly. It is elegant, - * type safe and compact. At some Past time Linus has decided he does not - * like variable length arrays, For the sake of this principal we uglify - * the code as below. - */ - struct objio_segment *lseg; - size_t lseg_size = sizeof(*lseg) + - numdevs * sizeof(lseg->oc.ods[0]) + - numdevs * sizeof(*lseg->oc.comps); - - lseg = kzalloc(lseg_size, gfp_flags); - if (unlikely(!lseg)) { + struct __alloc_objio_segment { + struct objio_segment olseg; + struct ore_dev *ods[numdevs]; + struct ore_comp comps[numdevs]; + } *aolseg; + + aolseg = kzalloc(sizeof(*aolseg), gfp_flags); + if (unlikely(!aolseg)) { dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, - numdevs, lseg_size); + numdevs, sizeof(*aolseg)); return -ENOMEM; } - lseg->oc.numdevs = numdevs; - lseg->oc.single_comp = EC_MULTPLE_COMPS; - lseg->oc.ods = (void *)(lseg + 1); - lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); + aolseg->olseg.oc.numdevs = numdevs; + aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; + aolseg->olseg.oc.comps = aolseg->comps; + aolseg->olseg.oc.ods = aolseg->ods; - *pseg = lseg; + *pseg = &aolseg->olseg; return 0; } @@ -602,10 +582,10 @@ objlayout_init(void) if (ret) printk(KERN_INFO - "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", + "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", __func__, ret); else - printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", + printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", __func__); return ret; } @@ -614,7 +594,7 @@ static void __exit objlayout_exit(void) { pnfs_unregister_layoutdriver(&objlayout_type); - printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", + printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", __func__); } diff --git a/trunk/fs/nfs/objlayout/objlayout.c b/trunk/fs/nfs/objlayout/objlayout.c index 8d45f1c318ce..b3c29039f5b8 100644 --- a/trunk/fs/nfs/objlayout/objlayout.c +++ b/trunk/fs/nfs/objlayout/objlayout.c @@ -37,9 +37,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include #include #include "objlayout.h" @@ -159,7 +156,7 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1 : NFS4_MAX_UINT64; } -static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, +void _fix_verify_io_params(struct pnfs_layout_segment *lseg, struct page ***p_pages, unsigned *p_pgbase, u64 offset, unsigned long count) { @@ -493,9 +490,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) if (!ioerr->oer_errno) continue; - printk(KERN_ERR "NFS: %s: err[%d]: errno=%d " - "is_write=%d dev(%llx:%llx) par=0x%llx " - "obj=0x%llx offset=0x%llx length=0x%llx\n", + printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " + "dev(%llx:%llx) par=0x%llx obj=0x%llx " + "offset=0x%llx length=0x%llx\n", __func__, i, ioerr->oer_errno, ioerr->oer_iswrite, _DEVID_LO(&ioerr->oer_component.oid_device_id), @@ -654,134 +651,3 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) __free_page(odi->page); kfree(odi); } - -enum { - OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, - OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, - OSD_LOGIN_UPCALL_PATHLEN = 256 -}; - -static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login"; - -module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog), - 0600); -MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program"); - -struct __auto_login { - char uri[OBJLAYOUT_MAX_URI_LEN]; - char osdname[OBJLAYOUT_MAX_OSDNAME_LEN]; - char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN]; -}; - -static int __objlayout_upcall(struct __auto_login *login) -{ - static char *envp[] = { "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL - }; - char *argv[8]; - int ret; - - if (unlikely(!osd_login_prog[0])) { - dprintk("%s: osd_login_prog is disabled\n", __func__); - return -EACCES; - } - - dprintk("%s uri: %s\n", __func__, login->uri); - dprintk("%s osdname %s\n", __func__, login->osdname); - dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex); - - argv[0] = (char *)osd_login_prog; - argv[1] = "-u"; - argv[2] = login->uri; - argv[3] = "-o"; - argv[4] = login->osdname; - argv[5] = "-s"; - argv[6] = login->systemid_hex; - argv[7] = NULL; - - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - /* - * Disable the upcall mechanism if we're getting an ENOENT or - * EACCES error. The admin can re-enable it on the fly by using - * sysfs to set the objlayoutdriver.osd_login_prog module parameter once - * the problem has been fixed. - */ - if (ret == -ENOENT || ret == -EACCES) { - printk(KERN_ERR "PNFS-OBJ: %s was not found please set " - "objlayoutdriver.osd_login_prog kernel parameter!\n", - osd_login_prog); - osd_login_prog[0] = '\0'; - } - dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret); - - return ret; -} - -/* Assume dest is all zeros */ -static void __copy_nfsS_and_zero_terminate(struct nfs4_string s, - char *dest, int max_len, - const char *var_name) -{ - if (!s.len) - return; - - if (s.len >= max_len) { - pr_warn_ratelimited( - "objlayout_autologin: %s: s.len(%d) >= max_len(%d)", - var_name, s.len, max_len); - s.len = max_len - 1; /* space for null terminator */ - } - - memcpy(dest, s.data, s.len); -} - -/* Assume sysid is all zeros */ -static void _sysid_2_hex(struct nfs4_string s, - char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN]) -{ - int i; - char *cur; - - if (!s.len) - return; - - if (s.len != OSD_SYSTEMID_LEN) { - pr_warn_ratelimited( - "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN", - s.len); - if (s.len > OSD_SYSTEMID_LEN) - s.len = OSD_SYSTEMID_LEN; - } - - cur = sysid; - for (i = 0; i < s.len; i++) - cur = hex_byte_pack(cur, s.data[i]); -} - -int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr) -{ - int rc; - struct __auto_login login; - - if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len) - return -ENODEV; - - memset(&login, 0, sizeof(login)); - __copy_nfsS_and_zero_terminate( - deviceaddr->oda_targetaddr.ota_netaddr.r_addr, - login.uri, sizeof(login.uri), "URI"); - - __copy_nfsS_and_zero_terminate( - deviceaddr->oda_osdname, - login.osdname, sizeof(login.osdname), "OSDNAME"); - - _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex); - - rc = __objlayout_upcall(&login); - if (rc > 0) /* script returns positive values */ - rc = -ENODEV; - - return rc; -} diff --git a/trunk/fs/nfs/objlayout/objlayout.h b/trunk/fs/nfs/objlayout/objlayout.h index 880ba086be94..8ec34727ed21 100644 --- a/trunk/fs/nfs/objlayout/objlayout.h +++ b/trunk/fs/nfs/objlayout/objlayout.h @@ -184,6 +184,4 @@ extern void objlayout_encode_layoutreturn( struct xdr_stream *, const struct nfs4_layoutreturn_args *); -extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr); - #endif /* _OBJLAYOUT_H */ diff --git a/trunk/fs/nfs/pagelist.c b/trunk/fs/nfs/pagelist.c index d21fceaa9f62..5668f7c54c41 100644 --- a/trunk/fs/nfs/pagelist.c +++ b/trunk/fs/nfs/pagelist.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -107,6 +106,36 @@ void nfs_unlock_request(struct nfs_page *req) nfs_release_request(req); } +/** + * nfs_set_page_tag_locked - Tag a request as locked + * @req: + */ +int nfs_set_page_tag_locked(struct nfs_page *req) +{ + if (!nfs_lock_request_dontget(req)) + return 0; + if (test_bit(PG_MAPPED, &req->wb_flags)) + radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + return 1; +} + +/** + * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers + */ +void nfs_clear_page_tag_locked(struct nfs_page *req) +{ + if (test_bit(PG_MAPPED, &req->wb_flags)) { + struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + nfs_unlock_request(req); + spin_unlock(&inode->i_lock); + } else + nfs_unlock_request(req); +} + /* * nfs_clear_request - Free up all resources allocated to the request * @req: @@ -396,6 +425,67 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_list - Scan a list for matching requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * @tag: tag to scan for + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's i_lock when calling this function + */ +int nfs_scan_list(struct nfs_inode *nfsi, + struct list_head *dst, pgoff_t idx_start, + unsigned int npages, int tag) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + pgoff_t idx_end; + int found, i; + int res; + struct list_head *list; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, + NFS_SCAN_MAXENTRIES, tag); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + idx_start = req->wb_index + 1; + if (nfs_set_page_tag_locked(req)) { + kref_get(&req->wb_kref); + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, tag); + list = pnfs_choose_commit_list(req, dst); + nfs_list_add_request(req, list); + res++; + if (res == INT_MAX) + goto out; + } + } + /* for latency reduction */ + cond_resched_lock(&nfsi->vfs_inode.i_lock); + } +out: + return res; +} + int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", diff --git a/trunk/fs/nfs/pnfs.c b/trunk/fs/nfs/pnfs.c index b5d451586943..17149a490065 100644 --- a/trunk/fs/nfs/pnfs.c +++ b/trunk/fs/nfs/pnfs.c @@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, goto out_no_driver; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { - printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", - __func__, id, server->nfs_client->cl_exchange_flags); + printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, + id, server->nfs_client->cl_exchange_flags); goto out_no_driver; } ld_type = find_pnfs_driver(id); @@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, server->pnfs_curr_ld = ld_type; if (ld_type->set_layoutdriver && ld_type->set_layoutdriver(server, mntfh)) { - printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " - "driver %u.\n", __func__, id); + printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", + __func__, id); module_put(ld_type->owner); goto out_no_driver; } @@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) struct pnfs_layoutdriver_type *tmp; if (ld_type->id == 0) { - printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); + printk(KERN_ERR "%s id 0 is reserved\n", __func__); return status; } if (!ld_type->alloc_lseg || !ld_type->free_lseg) { - printk(KERN_ERR "NFS: %s Layout driver must provide " + printk(KERN_ERR "%s Layout driver must provide " "alloc_lseg and free_lseg.\n", __func__); return status; } @@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, ld_type->name); } else { - printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", + printk(KERN_ERR "%s Module with id %d already loaded!\n", __func__, ld_type->id); } spin_unlock(&pnfs_spinlock); @@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, { u32 oldseq, newseq; - oldseq = be32_to_cpu(lo->plh_stateid.seqid); - newseq = be32_to_cpu(new->seqid); + oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); + newseq = be32_to_cpu(new->stateid.seqid); if ((int)(newseq - oldseq) > 0) { - nfs4_stateid_copy(&lo->plh_stateid, new); + memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); if (update_barrier) { - u32 new_barrier = be32_to_cpu(new->seqid); + u32 new_barrier = be32_to_cpu(new->stateid.seqid); if ((int)(new_barrier - lo->plh_barrier)) lo->plh_barrier = new_barrier; @@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, int lget) { if ((stateid) && - (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) + (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) return true; return lo->plh_block_lgets || test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || @@ -549,10 +549,11 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, do { seq = read_seqbegin(&open_state->seqlock); - nfs4_stateid_copy(dst, &open_state->stateid); + memcpy(dst->data, open_state->stateid.data, + sizeof(open_state->stateid.data)); } while (read_seqretry(&open_state->seqlock, seq)); } else - nfs4_stateid_copy(dst, &lo->plh_stateid); + memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); return status; @@ -589,7 +590,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; max_pages = max_resp_sz >> PAGE_SHIFT; - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (!pages) goto out_err_free; @@ -759,7 +760,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) } if (!found) { struct pnfs_layout_hdr *lo = nfsi->layout; - u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); + u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); /* Since close does not return a layout stateid for use as * a barrier, we choose the worst-case barrier. @@ -965,7 +966,8 @@ pnfs_update_layout(struct inode *ino, } /* Do we even need to bother with this? */ - if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; } @@ -1030,6 +1032,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; struct inode *ino = lo->plh_inode; + struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; /* Inject layout blob into I/O device driver */ @@ -1045,7 +1048,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } spin_lock(&ino->i_lock); - if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { dprintk("%s forget reply due to recall\n", __func__); goto out_forget_reply; } @@ -1210,7 +1214,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data) } data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); } - put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1224,7 +1227,6 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_list_add_request(data->req, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; - put_lseg(data->lseg); nfs_writedata_release(data); } @@ -1325,7 +1327,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data) data->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); - put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1529,7 +1530,8 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) end_pos = nfsi->layout->plh_lwb; nfsi->layout->plh_lwb = 0; - nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); + memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, + sizeof(nfsi->layout->plh_stateid.data)); spin_unlock(&inode->i_lock); data->args.inode = inode; diff --git a/trunk/fs/nfs/pnfs.h b/trunk/fs/nfs/pnfs.h index 442ebf68eeec..53d593a0a4f2 100644 --- a/trunk/fs/nfs/pnfs.h +++ b/trunk/fs/nfs/pnfs.h @@ -94,10 +94,11 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; - void (*mark_request_commit) (struct nfs_page *req, - struct pnfs_layout_segment *lseg); - void (*clear_request_commit) (struct nfs_page *req); - int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); + /* Returns true if layoutdriver wants to divert this request to + * driver's commit routine. + */ + bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); + struct list_head * (*choose_commit_list) (struct nfs_page *req); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); /* @@ -228,6 +229,7 @@ struct nfs4_deviceid_node { atomic_t ref; }; +void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, @@ -260,6 +262,20 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } +static inline void +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ + if (lseg) { + struct pnfs_layoutdriver_type *ld; + + ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; + if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { + set_bit(PG_PNFS_COMMIT, &req->wb_flags); + req->wb_commit_lseg = get_lseg(lseg); + } + } +} + static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { @@ -268,42 +284,27 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); } -static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +static inline struct list_head * +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (lseg == NULL || ld->mark_request_commit == NULL) - return false; - ld->mark_request_commit(req, lseg); - return true; -} + struct list_head *rv; -static inline bool -pnfs_clear_request_commit(struct nfs_page *req) -{ - struct inode *inode = req->wb_context->dentry->d_inode; - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { + struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; - if (ld == NULL || ld->clear_request_commit == NULL) - return false; - ld->clear_request_commit(req); - return true; + set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); + rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); + /* matched by ref taken when PG_PNFS_COMMIT is set */ + put_lseg(req->wb_commit_lseg); + } else + rv = mds; + return rv; } -static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +static inline void pnfs_clear_request_commit(struct nfs_page *req) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - int ret; - - if (ld == NULL || ld->scan_commit_lists == NULL) - return 0; - ret = ld->scan_commit_lists(inode, max, lock); - if (ret != 0) - set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); - return ret; + if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) + put_lseg(req->wb_commit_lseg); } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -327,13 +328,6 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } -#ifdef NFS_DEBUG -void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); -#else -static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id) -{ -} -#endif /* NFS_DEBUG */ #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -406,35 +400,35 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st return false; } +static inline void +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ +} + static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { return PNFS_NOT_ATTEMPTED; } -static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +static inline struct list_head * +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) { - return false; + return mds; } -static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +static inline void pnfs_clear_request_commit(struct nfs_page *req) { - return false; } -static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; } -static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) +static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) { - return 0; } - #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/trunk/fs/nfs/pnfs_dev.c b/trunk/fs/nfs/pnfs_dev.c index 73f701f1f4d3..4f359d2a26eb 100644 --- a/trunk/fs/nfs/pnfs_dev.c +++ b/trunk/fs/nfs/pnfs_dev.c @@ -43,7 +43,6 @@ static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; static DEFINE_SPINLOCK(nfs4_deviceid_lock); -#ifdef NFS_DEBUG void nfs4_print_deviceid(const struct nfs4_deviceid *id) { @@ -53,7 +52,6 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id) p[0], p[1], p[2], p[3]); } EXPORT_SYMBOL_GPL(nfs4_print_deviceid); -#endif static inline u32 nfs4_deviceid_hash(const struct nfs4_deviceid *id) @@ -94,7 +92,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, * @clp nfs_client associated with deviceid * @id deviceid to look up */ -static struct nfs4_deviceid_node * +struct nfs4_deviceid_node * _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id, long hash) diff --git a/trunk/fs/nfs/proc.c b/trunk/fs/nfs/proc.c index b63b6f4d14fb..0c672588fe5a 100644 --- a/trunk/fs/nfs/proc.c +++ b/trunk/fs/nfs/proc.c @@ -358,11 +358,6 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } -static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) -{ - rpc_call_start(task); -} - static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { if (nfs_async_handle_expired_key(task)) @@ -377,11 +372,6 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; } -static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) -{ - rpc_call_start(task); -} - static int nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) @@ -661,11 +651,6 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) -{ - rpc_call_start(task); -} - static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs_async_handle_expired_key(task)) @@ -683,11 +668,6 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - rpc_call_start(task); -} - static void nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { @@ -741,11 +721,9 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .create = nfs_proc_create, .remove = nfs_proc_remove, .unlink_setup = nfs_proc_unlink_setup, - .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, .unlink_done = nfs_proc_unlink_done, .rename = nfs_proc_rename, .rename_setup = nfs_proc_rename_setup, - .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, .rename_done = nfs_proc_rename_done, .link = nfs_proc_link, .symlink = nfs_proc_symlink, @@ -758,10 +736,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, - .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, - .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .lock = nfs_proc_lock, diff --git a/trunk/fs/nfs/read.c b/trunk/fs/nfs/read.c index cc1f758a7ee1..cfa175c223dc 100644 --- a/trunk/fs/nfs/read.c +++ b/trunk/fs/nfs/read.c @@ -66,6 +66,7 @@ void nfs_readdata_free(struct nfs_read_data *p) void nfs_readdata_release(struct nfs_read_data *rdata) { + put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } @@ -464,14 +465,23 @@ static void nfs_readpage_release_partial(void *calldata) nfs_readdata_release(calldata); } +#if defined(CONFIG_NFS_V4_1) void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - NFS_PROTO(data->inode)->read_rpc_prepare(task, data); + + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, &data->res.seq_res, + 0, task)) + return; + rpc_call_start(task); } +#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_read_partial_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_partial, .rpc_release = nfs_readpage_release_partial, }; @@ -535,7 +545,9 @@ static void nfs_readpage_release_full(void *calldata) } static const struct rpc_call_ops nfs_read_full_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_full, .rpc_release = nfs_readpage_release_full, }; diff --git a/trunk/fs/nfs/super.c b/trunk/fs/nfs/super.c index ccc4cdb1efe9..3dfa4f112c0a 100644 --- a/trunk/fs/nfs/super.c +++ b/trunk/fs/nfs/super.c @@ -52,8 +52,6 @@ #include #include #include -#include -#include #include #include @@ -81,6 +79,7 @@ enum { Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, + Opt_v2, Opt_v3, Opt_v4, Opt_udp, Opt_tcp, Opt_rdma, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, @@ -98,10 +97,10 @@ enum { Opt_namelen, Opt_mountport, Opt_mountvers, + Opt_nfsvers, Opt_minorversion, /* Mount options that take string arguments */ - Opt_nfsvers, Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_lookupcache, @@ -133,6 +132,9 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_noac, "noac" }, { Opt_lock, "lock" }, { Opt_nolock, "nolock" }, + { Opt_v2, "v2" }, + { Opt_v3, "v3" }, + { Opt_v4, "v4" }, { Opt_udp, "udp" }, { Opt_tcp, "tcp" }, { Opt_rdma, "rdma" }, @@ -161,10 +163,9 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_namelen, "namlen=%s" }, { Opt_mountport, "mountport=%s" }, { Opt_mountvers, "mountvers=%s" }, - { Opt_minorversion, "minorversion=%s" }, - { Opt_nfsvers, "nfsvers=%s" }, { Opt_nfsvers, "vers=%s" }, + { Opt_minorversion, "minorversion=%s" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -178,9 +179,6 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_fscache_uniq, "fsc=%s" }, { Opt_local_lock, "local_lock=%s" }, - /* The following needs to be listed after all other options */ - { Opt_nfsvers, "v%s" }, - { Opt_err, NULL } }; @@ -261,22 +259,6 @@ static match_table_t nfs_local_lock_tokens = { { Opt_local_lock_err, NULL } }; -enum { - Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, - Opt_vers_4_1, - - Opt_vers_err -}; - -static match_table_t nfs_vers_tokens = { - { Opt_vers_2, "2" }, - { Opt_vers_3, "3" }, - { Opt_vers_4, "4" }, - { Opt_vers_4_0, "4.0" }, - { Opt_vers_4_1, "4.1" }, - - { Opt_vers_err, NULL } -}; static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); @@ -638,6 +620,7 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, struct nfs_client *clp = nfss->nfs_client; seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); + seq_printf(m, ",minorversion=%u", clp->cl_minorversion); } #else static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, @@ -646,15 +629,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, } #endif -static void nfs_show_nfs_version(struct seq_file *m, - unsigned int version, - unsigned int minorversion) -{ - seq_printf(m, ",vers=%u", version); - if (version == 4) - seq_printf(m, ".%u", minorversion); -} - /* * Describe the mount options in force on this server representation */ @@ -682,7 +656,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, u32 version = clp->rpc_ops->version; int local_flock, local_fcntl; - nfs_show_nfs_version(m, version, clp->cl_minorversion); + seq_printf(m, ",vers=%u", version); seq_printf(m, ",rsize=%u", nfss->rsize); seq_printf(m, ",wsize=%u", nfss->wsize); if (nfss->bsize != 0) @@ -702,10 +676,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, nfs_infop->nostr); } - rcu_read_lock(); seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); - rcu_read_unlock(); if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); @@ -754,11 +726,9 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root) nfs_show_mount_options(m, nfss, 0); - rcu_read_lock(); seq_printf(m, ",addr=%s", rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, RPC_DISPLAY_ADDR)); - rcu_read_unlock(); return 0; } @@ -775,6 +745,7 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {} #endif #endif +#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4_1 static void show_pnfs(struct seq_file *m, struct nfs_server *server) { @@ -784,26 +755,9 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) else seq_printf(m, "not configured"); } - -static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) -{ - if (nfss->nfs_client && nfss->nfs_client->impl_id) { - struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; - seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," - "date='%llu,%u'", - impl_id->name, impl_id->domain, - impl_id->date.seconds, impl_id->date.nseconds); - } -} #else -#ifdef CONFIG_NFS_V4 -static void show_pnfs(struct seq_file *m, struct nfs_server *server) -{ -} +static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} #endif -static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) -{ -} #endif static int nfs_show_devname(struct seq_file *m, struct dentry *root) @@ -852,8 +806,6 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); - show_implementation_id(m, nfss); - seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%u", nfss->wtmult); @@ -956,7 +908,6 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data->auth_flavor_len = 1; data->version = version; data->minorversion = 0; - data->net = current->nsproxy->net_ns; security_init_mnt_opts(&data->lsm_opts); } return data; @@ -1101,40 +1052,6 @@ static int nfs_parse_security_flavors(char *value, return 1; } -static int nfs_parse_version_string(char *string, - struct nfs_parsed_mount_data *mnt, - substring_t *args) -{ - mnt->flags &= ~NFS_MOUNT_VER3; - switch (match_token(string, nfs_vers_tokens, args)) { - case Opt_vers_2: - mnt->version = 2; - break; - case Opt_vers_3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case Opt_vers_4: - /* Backward compatibility option. In future, - * the mount program should always supply - * a NFSv4 minor version number. - */ - mnt->version = 4; - break; - case Opt_vers_4_0: - mnt->version = 4; - mnt->minorversion = 0; - break; - case Opt_vers_4_1: - mnt->version = 4; - mnt->minorversion = 1; - break; - default: - return 0; - } - return 1; -} - static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); @@ -1240,6 +1157,18 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; + case Opt_v2: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 2; + break; + case Opt_v3: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; + case Opt_v4: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 4; + break; case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; @@ -1366,6 +1295,26 @@ static int nfs_parse_mount_options(char *raw, goto out_invalid_value; mnt->mount_server.version = option; break; + case Opt_nfsvers: + if (nfs_get_option_ul(args, &option)) + goto out_invalid_value; + switch (option) { + case NFS2_VERSION: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 2; + break; + case NFS3_VERSION: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; + case NFS4_VERSION: + mnt->flags &= ~NFS_MOUNT_VER3; + mnt->version = 4; + break; + default: + goto out_invalid_value; + } + break; case Opt_minorversion: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; @@ -1377,15 +1326,6 @@ static int nfs_parse_mount_options(char *raw, /* * options that take text values */ - case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_version_string(string, mnt, args); - kfree(string); - if (!rc) - goto out_invalid_value; - break; case Opt_sec: string = match_strdup(args); if (string == NULL) @@ -1465,7 +1405,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->nfs_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), + rpc_pton(string, strlen(string), (struct sockaddr *) &mnt->nfs_server.address, sizeof(mnt->nfs_server.address)); @@ -1487,7 +1427,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->mount_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), + rpc_pton(string, strlen(string), (struct sockaddr *) &mnt->mount_server.address, sizeof(mnt->mount_server.address)); @@ -1576,9 +1516,6 @@ static int nfs_parse_mount_options(char *raw, if (!sloppy && invalid_option) return 0; - if (mnt->minorversion && mnt->version != 4) - goto out_minorversion_mismatch; - /* * verify that any proto=/mountproto= options match the address * familiies in the addr=/mountaddr= options. @@ -1612,10 +1549,6 @@ static int nfs_parse_mount_options(char *raw, out_invalid_value: printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); return 0; -out_minorversion_mismatch: - printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", mnt->version, mnt->minorversion); - return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; @@ -1689,7 +1622,6 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = &server_authlist_len, .auth_flavs = server_authlist, - .net = args->net, }; int status; @@ -2115,7 +2047,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* We probably want something more informative here */ snprintf(sb->s_id, sizeof(sb->s_id), - "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize, @@ -2567,6 +2499,12 @@ static int nfs4_validate_text_mount_data(void *options, return -EINVAL; } + if (args->client_address == NULL) { + dfprintk(MOUNT, + "NFS4: mount program didn't pass callback address\n"); + return -EINVAL; + } + return nfs_parse_devname(dev_name, &args->nfs_server.hostname, NFS4_MAXNAMLEN, @@ -2725,7 +2663,8 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); - nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); + nfs_fscache_get_super_cookie( + s, data ? data->fscache_uniq : NULL, NULL); } mntroot = nfs4_get_root(s, mntfh, dev_name); diff --git a/trunk/fs/nfs/sysctl.c b/trunk/fs/nfs/sysctl.c index ad4d2e787b20..978aaeb8a093 100644 --- a/trunk/fs/nfs/sysctl.c +++ b/trunk/fs/nfs/sysctl.c @@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = { .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, +#ifndef CONFIG_NFS_USE_NEW_IDMAPPER { .procname = "idmap_cache_timeout", .data = &nfs_idmap_cache_timeout, @@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ #endif { .procname = "nfs_mountpoint_timeout", diff --git a/trunk/fs/nfs/unlink.c b/trunk/fs/nfs/unlink.c index 3210a03342f9..4f9319a2e567 100644 --- a/trunk/fs/nfs/unlink.c +++ b/trunk/fs/nfs/unlink.c @@ -20,6 +20,15 @@ #include "iostat.h" #include "delegation.h" +struct nfs_unlinkdata { + struct hlist_node list; + struct nfs_removeargs args; + struct nfs_removeres res; + struct inode *dir; + struct rpc_cred *cred; + struct nfs_fattr dir_attr; +}; + /** * nfs_free_unlinkdata - release data from a sillydelete operation. * @data: pointer to unlink structure. @@ -98,16 +107,25 @@ static void nfs_async_unlink_release(void *calldata) nfs_sb_deactive(sb); } -static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) +#if defined(CONFIG_NFS_V4_1) +void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); + struct nfs_server *server = NFS_SERVER(data->dir); + + if (nfs4_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); } +#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_unlink_prepare, +#endif /* CONFIG_NFS_V4_1 */ }; static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) @@ -323,6 +341,18 @@ nfs_cancel_async_unlink(struct dentry *dentry) spin_unlock(&dentry->d_lock); } +struct nfs_renamedata { + struct nfs_renameargs args; + struct nfs_renameres res; + struct rpc_cred *cred; + struct inode *old_dir; + struct dentry *old_dentry; + struct nfs_fattr old_fattr; + struct inode *new_dir; + struct dentry *new_dentry; + struct nfs_fattr new_fattr; +}; + /** * nfs_async_rename_done - Sillyrename post-processing * @task: rpc_task of the sillyrename @@ -373,16 +403,25 @@ static void nfs_async_rename_release(void *calldata) kfree(data); } +#if defined(CONFIG_NFS_V4_1) static void nfs_rename_prepare(struct rpc_task *task, void *calldata) { struct nfs_renamedata *data = calldata; - NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data); + struct nfs_server *server = NFS_SERVER(data->old_dir); + + if (nfs4_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); } +#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_rename_ops = { .rpc_call_done = nfs_async_rename_done, .rpc_release = nfs_async_rename_release, +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_rename_prepare, +#endif /* CONFIG_NFS_V4_1 */ }; /** diff --git a/trunk/fs/nfs/write.c b/trunk/fs/nfs/write.c index 2c68818f68ac..834f0fe96f89 100644 --- a/trunk/fs/nfs/write.c +++ b/trunk/fs/nfs/write.c @@ -100,6 +100,7 @@ void nfs_writedata_free(struct nfs_write_data *p) void nfs_writedata_release(struct nfs_write_data *wdata) { + put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -235,10 +236,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo req = nfs_page_find_request_locked(page); if (req == NULL) break; - if (nfs_lock_request_dontget(req)) + if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request_dontget() will always + * then the call to nfs_set_page_tag_locked() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ @@ -374,14 +375,21 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) /* * Insert a write request into an inode */ -static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); + int error; + + error = radix_tree_preload(GFP_NOFS); + if (error != 0) + goto out; /* Lock the request! */ nfs_lock_request_dontget(req); spin_lock(&inode->i_lock); + error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); + BUG_ON(error); if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); @@ -389,7 +397,12 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_LOCKED); spin_unlock(&inode->i_lock); + radix_tree_preload_end(); +out: + return error; } /* @@ -406,6 +419,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); clear_bit(PG_MAPPED, &req->wb_flags); + radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; spin_unlock(&inode->i_lock); nfs_release_request(req); @@ -418,90 +432,39 @@ nfs_mark_request_dirty(struct nfs_page *req) } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -/** - * nfs_request_add_commit_list - add request to a commit list - * @req: pointer to a struct nfs_page - * @head: commit list head - * - * This sets the PG_CLEAN bit, updates the inode global count of - * number of outstanding requests requiring a commit as well as - * the MM page stats. - * - * The caller must _not_ hold the inode->i_lock, but must be - * holding the nfs_page lock. +/* + * Add a request to the inode's commit list. */ -void -nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) +static void +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); - set_bit(PG_CLEAN, &(req)->wb_flags); spin_lock(&inode->i_lock); - nfs_list_add_request(req, head); - NFS_I(inode)->ncommit++; + set_bit(PG_CLEAN, &(req)->wb_flags); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); + nfsi->ncommit++; spin_unlock(&inode->i_lock); + pnfs_mark_request_commit(req, lseg); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } -EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); - -/** - * nfs_request_remove_commit_list - Remove request from a commit list - * @req: pointer to a nfs_page - * - * This clears the PG_CLEAN bit, and updates the inode global count of - * number of outstanding requests requiring a commit - * It does not update the MM page stats. - * - * The caller _must_ hold the inode->i_lock and the nfs_page lock. - */ -void -nfs_request_remove_commit_list(struct nfs_page *req) -{ - struct inode *inode = req->wb_context->dentry->d_inode; - - if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) - return; - nfs_list_remove_request(req); - NFS_I(inode)->ncommit--; -} -EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); - - -/* - * Add a request to the inode's commit list. - */ -static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ - struct inode *inode = req->wb_context->dentry->d_inode; - - if (pnfs_mark_request_commit(req, lseg)) - return; - nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); -} - -static void -nfs_clear_page_commit(struct page *page) -{ - dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); -} -static void +static int nfs_clear_request_commit(struct nfs_page *req) { - if (test_bit(PG_CLEAN, &req->wb_flags)) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct page *page = req->wb_page; - if (!pnfs_clear_request_commit(req)) { - spin_lock(&inode->i_lock); - nfs_request_remove_commit_list(req); - spin_unlock(&inode->i_lock); - } - nfs_clear_page_commit(req->wb_page); + if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + return 1; } + return 0; } static inline @@ -528,14 +491,15 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, return 0; } #else -static void +static inline void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } -static void +static inline int nfs_clear_request_commit(struct nfs_page *req) { + return 0; } static inline @@ -556,65 +520,46 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, static int nfs_need_commit(struct nfs_inode *nfsi) { - return nfsi->ncommit > 0; -} - -/* i_lock held by caller */ -static int -nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, - spinlock_t *lock) -{ - struct nfs_page *req, *tmp; - int ret = 0; - - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; - if (cond_resched_lock(lock)) - list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req); - nfs_list_add_request(req, dst); - ret++; - if (ret == max) - break; - } - return ret; + return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); } /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan * @dst: destination list + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. * * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst) +nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int ret = 0; + int ret; - spin_lock(&inode->i_lock); - if (nfsi->ncommit > 0) { - const int max = INT_MAX; + if (!nfs_need_commit(nfsi)) + return 0; - ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, - &inode->i_lock); - ret += pnfs_scan_commit_lists(inode, max - ret, - &inode->i_lock); - } + spin_lock(&inode->i_lock); + ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + if (ret > 0) + nfsi->ncommit -= ret; spin_unlock(&inode->i_lock); + + if (nfs_need_commit(NFS_I(inode))) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; } - #else static inline int nfs_need_commit(struct nfs_inode *nfsi) { return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { return 0; } @@ -659,7 +604,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, || end < req->wb_offset) goto out_flushme; - if (nfs_lock_request_dontget(req)) + if (nfs_set_page_tag_locked(req)) break; /* The request is locked, so wait and then retry */ @@ -671,6 +616,13 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } + if (nfs_clear_request_commit(req) && + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { + NFS_I(inode)->ncommit--; + pnfs_clear_request_commit(req); + } + /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; @@ -682,7 +634,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); - nfs_clear_request_commit(req); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -704,6 +655,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, { struct inode *inode = page->mapping->host; struct nfs_page *req; + int error; req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) @@ -711,7 +663,11 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(req)) goto out; - nfs_inode_add_request(inode, req); + error = nfs_inode_add_request(inode, req); + if (error != 0) { + nfs_release_request(req); + req = ERR_PTR(error); + } out: return req; } @@ -728,7 +684,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); return 0; } @@ -821,7 +777,7 @@ static void nfs_writepage_release(struct nfs_page *req, if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) nfs_inode_remove_request(req); - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); } @@ -969,7 +925,7 @@ static void nfs_redirty_request(struct nfs_page *req) struct page *page = req->wb_page; nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); } @@ -1172,14 +1128,23 @@ static void nfs_writeback_release_partial(void *calldata) nfs_writedata_release(calldata); } +#if defined(CONFIG_NFS_V4_1) void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - NFS_PROTO(data->inode)->write_rpc_prepare(task, data); + + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); } +#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_write_partial_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_partial, .rpc_release = nfs_writeback_release_partial, }; @@ -1234,14 +1199,16 @@ static void nfs_writeback_release_full(void *calldata) remove_request: nfs_inode_remove_request(req); next: - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); } nfs_writedata_release(calldata); } static const struct rpc_call_ops nfs_write_full_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_full, .rpc_release = nfs_writeback_release_full, }; @@ -1358,6 +1325,7 @@ void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; + put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1443,7 +1411,7 @@ void nfs_retry_commit(struct list_head *page_list, dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); } } EXPORT_SYMBOL_GPL(nfs_retry_commit); @@ -1492,7 +1460,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - nfs_clear_page_commit(req->wb_page); + nfs_clear_request_commit(req); dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->dentry->d_sb->s_id, @@ -1518,7 +1486,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_clear_page_tag_locked(req); } } EXPORT_SYMBOL_GPL(nfs_commit_release_pages); @@ -1533,7 +1501,9 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { +#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; @@ -1547,7 +1517,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_commit_set_lock(NFS_I(inode), may_wait); if (res <= 0) goto out_mark_dirty; - res = nfs_scan_commit(inode, &head); + res = nfs_scan_commit(inode, &head, 0, 0); if (res) { int error; @@ -1665,7 +1635,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) if (req == NULL) break; if (nfs_lock_request_dontget(req)) { - nfs_clear_request_commit(req); nfs_inode_remove_request(req); /* * In case nfs_inode_remove_request has marked the diff --git a/trunk/fs/nfsd/nfs4callback.c b/trunk/fs/nfsd/nfs4callback.c index 0e262f32ac41..6f3ebb48b12f 100644 --- a/trunk/fs/nfsd/nfs4callback.c +++ b/trunk/fs/nfsd/nfs4callback.c @@ -605,24 +605,24 @@ static struct rpc_version nfs_cb_version4 = { .procs = nfs4_cb_procedures }; -static const struct rpc_version *nfs_cb_version[] = { +static struct rpc_version *nfs_cb_version[] = { &nfs_cb_version4, }; -static const struct rpc_program cb_program; +static struct rpc_program cb_program; static struct rpc_stat cb_stats = { .program = &cb_program }; #define NFS4_CALLBACK 0x40000000 -static const struct rpc_program cb_program = { +static struct rpc_program cb_program = { .name = "nfs4_cb", .number = NFS4_CALLBACK, .nrvers = ARRAY_SIZE(nfs_cb_version), .version = nfs_cb_version, .stats = &cb_stats, - .pipe_dir_name = "nfsd4_cb", + .pipe_dir_name = "/nfsd4_cb", }; static int max_cb_time(void) diff --git a/trunk/fs/nfsd/nfs4state.c b/trunk/fs/nfsd/nfs4state.c index c5cddd659429..e8c98f009670 100644 --- a/trunk/fs/nfsd/nfs4state.c +++ b/trunk/fs/nfsd/nfs4state.c @@ -1308,7 +1308,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r else goto out_err; - conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); diff --git a/trunk/fs/nfsd/nfsctl.c b/trunk/fs/nfsd/nfsctl.c index 64c24af8d7ea..748eda93ce59 100644 --- a/trunk/fs/nfsd/nfsctl.c +++ b/trunk/fs/nfsd/nfsctl.c @@ -223,7 +223,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) + if (rpc_pton(fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -722,7 +722,7 @@ static ssize_t __write_ports_addxprt(char *buf) nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port); + xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); @@ -748,7 +748,7 @@ static ssize_t __write_ports_delxprt(char *buf) if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) return -EINVAL; - xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port); + xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); if (xprt == NULL) return -ENOTCONN; diff --git a/trunk/fs/nfsd/nfssvc.c b/trunk/fs/nfsd/nfssvc.c index fce472f5f39e..eda7d7e55e05 100644 --- a/trunk/fs/nfsd/nfssvc.c +++ b/trunk/fs/nfsd/nfssvc.c @@ -251,13 +251,13 @@ static void nfsd_shutdown(void) nfsd_up = false; } -static void nfsd_last_thread(struct svc_serv *serv, struct net *net) +static void nfsd_last_thread(struct svc_serv *serv) { /* When last nfsd thread exits we need to do some clean-up */ nfsd_serv = NULL; nfsd_shutdown(); - svc_rpcb_cleanup(serv, net); + svc_rpcb_cleanup(serv); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); diff --git a/trunk/fs/nfsd/stats.c b/trunk/fs/nfsd/stats.c index 6d4521feb6e3..a2e2402b2afb 100644 --- a/trunk/fs/nfsd/stats.c +++ b/trunk/fs/nfsd/stats.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "nfsd.h" @@ -95,11 +94,11 @@ static const struct file_operations nfsd_proc_fops = { void nfsd_stat_init(void) { - svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops); + svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); } void nfsd_stat_shutdown(void) { - svc_proc_unregister(&init_net, "nfsd"); + svc_proc_unregister("nfsd"); } diff --git a/trunk/fs/pstore/platform.c b/trunk/fs/pstore/platform.c index 82c585f715e3..9ec22d3b4293 100644 --- a/trunk/fs/pstore/platform.c +++ b/trunk/fs/pstore/platform.c @@ -68,25 +68,9 @@ void pstore_set_kmsg_bytes(int bytes) /* Tag each group of saved records with a sequence number */ static int oopscount; -static const char *get_reason_str(enum kmsg_dump_reason reason) -{ - switch (reason) { - case KMSG_DUMP_PANIC: - return "Panic"; - case KMSG_DUMP_OOPS: - return "Oops"; - case KMSG_DUMP_EMERG: - return "Emergency"; - case KMSG_DUMP_RESTART: - return "Restart"; - case KMSG_DUMP_HALT: - return "Halt"; - case KMSG_DUMP_POWEROFF: - return "Poweroff"; - default: - return "Unknown"; - } -} +static char *reason_str[] = { + "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency" +}; /* * callback from kmsg_dump. (s2,l2) has the most recently @@ -101,15 +85,17 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long s1_start, s2_start; unsigned long l1_cpy, l2_cpy; unsigned long size, total = 0; - char *dst; - const char *why; + char *dst, *why; u64 id; int hsize, ret; unsigned int part = 1; unsigned long flags = 0; int is_locked = 0; - why = get_reason_str(reason); + if (reason < ARRAY_SIZE(reason_str)) + why = reason_str[reason]; + else + why = "Unknown"; if (in_nmi()) { is_locked = spin_trylock(&psinfo->buf_lock); diff --git a/trunk/fs/quota/quota.c b/trunk/fs/quota/quota.c index 9a391204ca27..fc2c4388d126 100644 --- a/trunk/fs/quota/quota.c +++ b/trunk/fs/quota/quota.c @@ -282,9 +282,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_XGETQUOTA: return quota_getxquota(sb, type, id, addr); case Q_XQUOTASYNC: + /* caller already holds s_umount */ if (sb->s_flags & MS_RDONLY) return -EROFS; - /* XFS quotas are fully coherent now, making this call a noop */ + writeback_inodes_sb(sb, WB_REASON_SYNC); return 0; default: return -EINVAL; diff --git a/trunk/fs/ubifs/debug.c b/trunk/fs/ubifs/debug.c index 1934084e2088..f922cbacdb96 100644 --- a/trunk/fs/ubifs/debug.c +++ b/trunk/fs/ubifs/debug.c @@ -36,7 +36,7 @@ #ifdef CONFIG_UBIFS_FS_DEBUG -static DEFINE_SPINLOCK(dbg_lock); +DEFINE_SPINLOCK(dbg_lock); static const char *get_key_fmt(int fmt) { @@ -221,15 +221,15 @@ const char *dbg_jhead(int jhead) static void dump_ch(const struct ubifs_ch *ch) { - printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); - printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); - printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, + printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); + printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); + printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, dbg_ntype(ch->node_type)); - printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, + printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, dbg_gtype(ch->group_type)); - printk(KERN_ERR "\tsqnum %llu\n", + printk(KERN_DEBUG "\tsqnum %llu\n", (unsigned long long)le64_to_cpu(ch->sqnum)); - printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); + printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); } void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) @@ -240,43 +240,43 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) struct ubifs_dent_node *dent, *pdent = NULL; int count = 2; - printk(KERN_ERR "Dump in-memory inode:"); - printk(KERN_ERR "\tinode %lu\n", inode->i_ino); - printk(KERN_ERR "\tsize %llu\n", + printk(KERN_DEBUG "Dump in-memory inode:"); + printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); + printk(KERN_DEBUG "\tsize %llu\n", (unsigned long long)i_size_read(inode)); - printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); - printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); - printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); - printk(KERN_ERR "\tatime %u.%u\n", + printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); + printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); + printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); + printk(KERN_DEBUG "\tatime %u.%u\n", (unsigned int)inode->i_atime.tv_sec, (unsigned int)inode->i_atime.tv_nsec); - printk(KERN_ERR "\tmtime %u.%u\n", + printk(KERN_DEBUG "\tmtime %u.%u\n", (unsigned int)inode->i_mtime.tv_sec, (unsigned int)inode->i_mtime.tv_nsec); - printk(KERN_ERR "\tctime %u.%u\n", + printk(KERN_DEBUG "\tctime %u.%u\n", (unsigned int)inode->i_ctime.tv_sec, (unsigned int)inode->i_ctime.tv_nsec); - printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); - printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); - printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); - printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); - printk(KERN_ERR "\tdirty %u\n", ui->dirty); - printk(KERN_ERR "\txattr %u\n", ui->xattr); - printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); - printk(KERN_ERR "\tsynced_i_size %llu\n", + printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); + printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); + printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); + printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); + printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); + printk(KERN_DEBUG "\txattr %u\n", ui->xattr); + printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); + printk(KERN_DEBUG "\tsynced_i_size %llu\n", (unsigned long long)ui->synced_i_size); - printk(KERN_ERR "\tui_size %llu\n", + printk(KERN_DEBUG "\tui_size %llu\n", (unsigned long long)ui->ui_size); - printk(KERN_ERR "\tflags %d\n", ui->flags); - printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); - printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); - printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); - printk(KERN_ERR "\tdata_len %d\n", ui->data_len); + printk(KERN_DEBUG "\tflags %d\n", ui->flags); + printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); + printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); + printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); + printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); if (!S_ISDIR(inode->i_mode)) return; - printk(KERN_ERR "List of directory entries:\n"); + printk(KERN_DEBUG "List of directory entries:\n"); ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); lowest_dent_key(c, &key, inode->i_ino); @@ -284,11 +284,11 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) dent = ubifs_tnc_next_ent(c, &key, &nm); if (IS_ERR(dent)) { if (PTR_ERR(dent) != -ENOENT) - printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); + printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); break; } - printk(KERN_ERR "\t%d: %s (%s)\n", + printk(KERN_DEBUG "\t%d: %s (%s)\n", count++, dent->name, get_dent_type(dent->type)); nm.name = dent->name; @@ -312,8 +312,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) /* If the magic is incorrect, just hexdump the first bytes */ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { - printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, (void *)node, UBIFS_CH_SZ, 1); return; } @@ -326,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_pad_node *pad = node; - printk(KERN_ERR "\tpad_len %u\n", + printk(KERN_DEBUG "\tpad_len %u\n", le32_to_cpu(pad->pad_len)); break; } @@ -335,50 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_sb_node *sup = node; unsigned int sup_flags = le32_to_cpu(sup->flags); - printk(KERN_ERR "\tkey_hash %d (%s)\n", + printk(KERN_DEBUG "\tkey_hash %d (%s)\n", (int)sup->key_hash, get_key_hash(sup->key_hash)); - printk(KERN_ERR "\tkey_fmt %d (%s)\n", + printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); - printk(KERN_ERR "\tflags %#x\n", sup_flags); - printk(KERN_ERR "\t big_lpt %u\n", + printk(KERN_DEBUG "\tflags %#x\n", sup_flags); + printk(KERN_DEBUG "\t big_lpt %u\n", !!(sup_flags & UBIFS_FLG_BIGLPT)); - printk(KERN_ERR "\t space_fixup %u\n", + printk(KERN_DEBUG "\t space_fixup %u\n", !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); - printk(KERN_ERR "\tmin_io_size %u\n", + printk(KERN_DEBUG "\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size)); - printk(KERN_ERR "\tleb_size %u\n", + printk(KERN_DEBUG "\tleb_size %u\n", le32_to_cpu(sup->leb_size)); - printk(KERN_ERR "\tleb_cnt %u\n", + printk(KERN_DEBUG "\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt)); - printk(KERN_ERR "\tmax_leb_cnt %u\n", + printk(KERN_DEBUG "\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt)); - printk(KERN_ERR "\tmax_bud_bytes %llu\n", + printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); - printk(KERN_ERR "\tlog_lebs %u\n", + printk(KERN_DEBUG "\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs)); - printk(KERN_ERR "\tlpt_lebs %u\n", + printk(KERN_DEBUG "\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs)); - printk(KERN_ERR "\torph_lebs %u\n", + printk(KERN_DEBUG "\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs)); - printk(KERN_ERR "\tjhead_cnt %u\n", + printk(KERN_DEBUG "\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt)); - printk(KERN_ERR "\tfanout %u\n", + printk(KERN_DEBUG "\tfanout %u\n", le32_to_cpu(sup->fanout)); - printk(KERN_ERR "\tlsave_cnt %u\n", + printk(KERN_DEBUG "\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt)); - printk(KERN_ERR "\tdefault_compr %u\n", + printk(KERN_DEBUG "\tdefault_compr %u\n", (int)le16_to_cpu(sup->default_compr)); - printk(KERN_ERR "\trp_size %llu\n", + printk(KERN_DEBUG "\trp_size %llu\n", (unsigned long long)le64_to_cpu(sup->rp_size)); - printk(KERN_ERR "\trp_uid %u\n", + printk(KERN_DEBUG "\trp_uid %u\n", le32_to_cpu(sup->rp_uid)); - printk(KERN_ERR "\trp_gid %u\n", + printk(KERN_DEBUG "\trp_gid %u\n", le32_to_cpu(sup->rp_gid)); - printk(KERN_ERR "\tfmt_version %u\n", + printk(KERN_DEBUG "\tfmt_version %u\n", le32_to_cpu(sup->fmt_version)); - printk(KERN_ERR "\ttime_gran %u\n", + printk(KERN_DEBUG "\ttime_gran %u\n", le32_to_cpu(sup->time_gran)); - printk(KERN_ERR "\tUUID %pUB\n", + printk(KERN_DEBUG "\tUUID %pUB\n", sup->uuid); break; } @@ -386,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_mst_node *mst = node; - printk(KERN_ERR "\thighest_inum %llu\n", + printk(KERN_DEBUG "\thighest_inum %llu\n", (unsigned long long)le64_to_cpu(mst->highest_inum)); - printk(KERN_ERR "\tcommit number %llu\n", + printk(KERN_DEBUG "\tcommit number %llu\n", (unsigned long long)le64_to_cpu(mst->cmt_no)); - printk(KERN_ERR "\tflags %#x\n", + printk(KERN_DEBUG "\tflags %#x\n", le32_to_cpu(mst->flags)); - printk(KERN_ERR "\tlog_lnum %u\n", + printk(KERN_DEBUG "\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum)); - printk(KERN_ERR "\troot_lnum %u\n", + printk(KERN_DEBUG "\troot_lnum %u\n", le32_to_cpu(mst->root_lnum)); - printk(KERN_ERR "\troot_offs %u\n", + printk(KERN_DEBUG "\troot_offs %u\n", le32_to_cpu(mst->root_offs)); - printk(KERN_ERR "\troot_len %u\n", + printk(KERN_DEBUG "\troot_len %u\n", le32_to_cpu(mst->root_len)); - printk(KERN_ERR "\tgc_lnum %u\n", + printk(KERN_DEBUG "\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum)); - printk(KERN_ERR "\tihead_lnum %u\n", + printk(KERN_DEBUG "\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum)); - printk(KERN_ERR "\tihead_offs %u\n", + printk(KERN_DEBUG "\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs)); - printk(KERN_ERR "\tindex_size %llu\n", + printk(KERN_DEBUG "\tindex_size %llu\n", (unsigned long long)le64_to_cpu(mst->index_size)); - printk(KERN_ERR "\tlpt_lnum %u\n", + printk(KERN_DEBUG "\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum)); - printk(KERN_ERR "\tlpt_offs %u\n", + printk(KERN_DEBUG "\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs)); - printk(KERN_ERR "\tnhead_lnum %u\n", + printk(KERN_DEBUG "\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum)); - printk(KERN_ERR "\tnhead_offs %u\n", + printk(KERN_DEBUG "\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs)); - printk(KERN_ERR "\tltab_lnum %u\n", + printk(KERN_DEBUG "\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum)); - printk(KERN_ERR "\tltab_offs %u\n", + printk(KERN_DEBUG "\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs)); - printk(KERN_ERR "\tlsave_lnum %u\n", + printk(KERN_DEBUG "\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum)); - printk(KERN_ERR "\tlsave_offs %u\n", + printk(KERN_DEBUG "\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs)); - printk(KERN_ERR "\tlscan_lnum %u\n", + printk(KERN_DEBUG "\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum)); - printk(KERN_ERR "\tleb_cnt %u\n", + printk(KERN_DEBUG "\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt)); - printk(KERN_ERR "\tempty_lebs %u\n", + printk(KERN_DEBUG "\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs)); - printk(KERN_ERR "\tidx_lebs %u\n", + printk(KERN_DEBUG "\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs)); - printk(KERN_ERR "\ttotal_free %llu\n", + printk(KERN_DEBUG "\ttotal_free %llu\n", (unsigned long long)le64_to_cpu(mst->total_free)); - printk(KERN_ERR "\ttotal_dirty %llu\n", + printk(KERN_DEBUG "\ttotal_dirty %llu\n", (unsigned long long)le64_to_cpu(mst->total_dirty)); - printk(KERN_ERR "\ttotal_used %llu\n", + printk(KERN_DEBUG "\ttotal_used %llu\n", (unsigned long long)le64_to_cpu(mst->total_used)); - printk(KERN_ERR "\ttotal_dead %llu\n", + printk(KERN_DEBUG "\ttotal_dead %llu\n", (unsigned long long)le64_to_cpu(mst->total_dead)); - printk(KERN_ERR "\ttotal_dark %llu\n", + printk(KERN_DEBUG "\ttotal_dark %llu\n", (unsigned long long)le64_to_cpu(mst->total_dark)); break; } @@ -448,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_ref_node *ref = node; - printk(KERN_ERR "\tlnum %u\n", + printk(KERN_DEBUG "\tlnum %u\n", le32_to_cpu(ref->lnum)); - printk(KERN_ERR "\toffs %u\n", + printk(KERN_DEBUG "\toffs %u\n", le32_to_cpu(ref->offs)); - printk(KERN_ERR "\tjhead %u\n", + printk(KERN_DEBUG "\tjhead %u\n", le32_to_cpu(ref->jhead)); break; } @@ -461,40 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_ino_node *ino = node; key_read(c, &ino->key, &key); - printk(KERN_ERR "\tkey %s\n", + printk(KERN_DEBUG "\tkey %s\n", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - printk(KERN_ERR "\tcreat_sqnum %llu\n", + printk(KERN_DEBUG "\tcreat_sqnum %llu\n", (unsigned long long)le64_to_cpu(ino->creat_sqnum)); - printk(KERN_ERR "\tsize %llu\n", + printk(KERN_DEBUG "\tsize %llu\n", (unsigned long long)le64_to_cpu(ino->size)); - printk(KERN_ERR "\tnlink %u\n", + printk(KERN_DEBUG "\tnlink %u\n", le32_to_cpu(ino->nlink)); - printk(KERN_ERR "\tatime %lld.%u\n", + printk(KERN_DEBUG "\tatime %lld.%u\n", (long long)le64_to_cpu(ino->atime_sec), le32_to_cpu(ino->atime_nsec)); - printk(KERN_ERR "\tmtime %lld.%u\n", + printk(KERN_DEBUG "\tmtime %lld.%u\n", (long long)le64_to_cpu(ino->mtime_sec), le32_to_cpu(ino->mtime_nsec)); - printk(KERN_ERR "\tctime %lld.%u\n", + printk(KERN_DEBUG "\tctime %lld.%u\n", (long long)le64_to_cpu(ino->ctime_sec), le32_to_cpu(ino->ctime_nsec)); - printk(KERN_ERR "\tuid %u\n", + printk(KERN_DEBUG "\tuid %u\n", le32_to_cpu(ino->uid)); - printk(KERN_ERR "\tgid %u\n", + printk(KERN_DEBUG "\tgid %u\n", le32_to_cpu(ino->gid)); - printk(KERN_ERR "\tmode %u\n", + printk(KERN_DEBUG "\tmode %u\n", le32_to_cpu(ino->mode)); - printk(KERN_ERR "\tflags %#x\n", + printk(KERN_DEBUG "\tflags %#x\n", le32_to_cpu(ino->flags)); - printk(KERN_ERR "\txattr_cnt %u\n", + printk(KERN_DEBUG "\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt)); - printk(KERN_ERR "\txattr_size %u\n", + printk(KERN_DEBUG "\txattr_size %u\n", le32_to_cpu(ino->xattr_size)); - printk(KERN_ERR "\txattr_names %u\n", + printk(KERN_DEBUG "\txattr_names %u\n", le32_to_cpu(ino->xattr_names)); - printk(KERN_ERR "\tcompr_type %#x\n", + printk(KERN_DEBUG "\tcompr_type %#x\n", (int)le16_to_cpu(ino->compr_type)); - printk(KERN_ERR "\tdata len %u\n", + printk(KERN_DEBUG "\tdata len %u\n", le32_to_cpu(ino->data_len)); break; } @@ -505,16 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int nlen = le16_to_cpu(dent->nlen); key_read(c, &dent->key, &key); - printk(KERN_ERR "\tkey %s\n", + printk(KERN_DEBUG "\tkey %s\n", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - printk(KERN_ERR "\tinum %llu\n", + printk(KERN_DEBUG "\tinum %llu\n", (unsigned long long)le64_to_cpu(dent->inum)); - printk(KERN_ERR "\ttype %d\n", (int)dent->type); - printk(KERN_ERR "\tnlen %d\n", nlen); - printk(KERN_ERR "\tname "); + printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); + printk(KERN_DEBUG "\tnlen %d\n", nlen); + printk(KERN_DEBUG "\tname "); if (nlen > UBIFS_MAX_NLEN) - printk(KERN_ERR "(bad name length, not printing, " + printk(KERN_DEBUG "(bad name length, not printing, " "bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) @@ -530,16 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); - printk(KERN_ERR "\tkey %s\n", + printk(KERN_DEBUG "\tkey %s\n", dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); - printk(KERN_ERR "\tsize %u\n", + printk(KERN_DEBUG "\tsize %u\n", le32_to_cpu(dn->size)); - printk(KERN_ERR "\tcompr_typ %d\n", + printk(KERN_DEBUG "\tcompr_typ %d\n", (int)le16_to_cpu(dn->compr_type)); - printk(KERN_ERR "\tdata size %d\n", + printk(KERN_DEBUG "\tdata size %d\n", dlen); - printk(KERN_ERR "\tdata:\n"); - print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, + printk(KERN_DEBUG "\tdata:\n"); + print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, (void *)&dn->data, dlen, 0); break; } @@ -547,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_trun_node *trun = node; - printk(KERN_ERR "\tinum %u\n", + printk(KERN_DEBUG "\tinum %u\n", le32_to_cpu(trun->inum)); - printk(KERN_ERR "\told_size %llu\n", + printk(KERN_DEBUG "\told_size %llu\n", (unsigned long long)le64_to_cpu(trun->old_size)); - printk(KERN_ERR "\tnew_size %llu\n", + printk(KERN_DEBUG "\tnew_size %llu\n", (unsigned long long)le64_to_cpu(trun->new_size)); break; } @@ -560,17 +560,17 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_idx_node *idx = node; n = le16_to_cpu(idx->child_cnt); - printk(KERN_ERR "\tchild_cnt %d\n", n); - printk(KERN_ERR "\tlevel %d\n", + printk(KERN_DEBUG "\tchild_cnt %d\n", n); + printk(KERN_DEBUG "\tlevel %d\n", (int)le16_to_cpu(idx->level)); - printk(KERN_ERR "\tBranches:\n"); + printk(KERN_DEBUG "\tBranches:\n"); for (i = 0; i < n && i < c->fanout - 1; i++) { const struct ubifs_branch *br; br = ubifs_idx_branch(c, idx, i); key_read(c, &br->key, &key); - printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", + printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), le32_to_cpu(br->len), dbg_snprintf_key(c, &key, key_buf, @@ -584,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_orph_node *orph = node; - printk(KERN_ERR "\tcommit number %llu\n", + printk(KERN_DEBUG "\tcommit number %llu\n", (unsigned long long) le64_to_cpu(orph->cmt_no) & LLONG_MAX); - printk(KERN_ERR "\tlast node flag %llu\n", + printk(KERN_DEBUG "\tlast node flag %llu\n", (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; - printk(KERN_ERR "\t%d orphan inode numbers:\n", n); + printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) - printk(KERN_ERR "\t ino %llu\n", + printk(KERN_DEBUG "\t ino %llu\n", (unsigned long long)le64_to_cpu(orph->inos[i])); break; } default: - printk(KERN_ERR "node type %d was not recognized\n", + printk(KERN_DEBUG "node type %d was not recognized\n", (int)ch->node_type); } spin_unlock(&dbg_lock); @@ -606,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) void dbg_dump_budget_req(const struct ubifs_budget_req *req) { spin_lock(&dbg_lock); - printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", + printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", req->new_ino, req->dirtied_ino); - printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", + printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", req->new_ino_d, req->dirtied_ino_d); - printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", + printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", req->new_page, req->dirtied_page); - printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", + printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", req->new_dent, req->mod_dent); - printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); - printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", + printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); + printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", req->data_growth, req->dd_growth); spin_unlock(&dbg_lock); } @@ -623,12 +623,12 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) void dbg_dump_lstats(const struct ubifs_lp_stats *lst) { spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " + printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); - printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " + printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, lst->total_dirty); - printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " + printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " "total_dead %lld\n", lst->total_used, lst->total_dark, lst->total_dead); spin_unlock(&dbg_lock); @@ -644,21 +644,21 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) spin_lock(&c->space_lock); spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " + printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " "total budget sum %lld\n", current->pid, bi->data_growth + bi->dd_growth, bi->data_growth + bi->dd_growth + bi->idx_growth); - printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " + printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, bi->idx_growth); - printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " + printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx); - printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", + printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", bi->page_budget, bi->inode_budget, bi->dent_budget); - printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", + printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp); - printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", c->dark_wm, c->dead_wm, c->max_idx_node_sz); if (bi != &c->bi) @@ -669,38 +669,38 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) */ goto out_unlock; - printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", + printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); - printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " + printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), atomic_long_read(&c->dirty_zn_cnt), atomic_long_read(&c->clean_zn_cnt)); - printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", + printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); /* If we are in R/O mode, journal heads do not exist */ if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_ERR "\tjhead %s\t LEB %d\n", + printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", dbg_jhead(c->jheads[i].wbuf.jhead), c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); - printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); + printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); } list_for_each_entry(bud, &c->old_buds, list) - printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); + printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); list_for_each_entry(idx_gc, &c->idx_gc, list) - printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", + printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", idx_gc->lnum, idx_gc->unmap); - printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); + printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); /* Print budgeting predictions */ available = ubifs_calc_available(c, c->bi.min_idx_lebs); outstanding = c->bi.data_growth + c->bi.dd_growth; free = ubifs_get_free_space_nolock(c); - printk(KERN_ERR "Budgeting predictions:\n"); - printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", + printk(KERN_DEBUG "Budgeting predictions:\n"); + printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); out_unlock: spin_unlock(&dbg_lock); @@ -720,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) dark = ubifs_calc_dark(c, spc); if (lp->flags & LPROPS_INDEX) - printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " "free + dirty %-8d flags %#x (", lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, lp->flags); else - printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " "flags %#-4x (", lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, dark, dead, @@ -807,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c) struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_ERR "(pid %d) start dumping LEB properties\n", + printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", current->pid); ubifs_get_lp_stats(c, &lst); dbg_dump_lstats(&lst); @@ -819,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c) dbg_dump_lprop(c, &lp); } - printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", + printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", current->pid); } @@ -828,35 +828,35 @@ void dbg_dump_lpt_info(struct ubifs_info *c) int i; spin_lock(&dbg_lock); - printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); - printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); - printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); - printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); - printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); - printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); - printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); - printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); - printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); - printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); - printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); - printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); - printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); - printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); - printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); - printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); - printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); - printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); - printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); - printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); - printk(KERN_ERR "\tLPT head is at %d:%d\n", + printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); + printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); + printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); + printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); + printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); + printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); + printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); + printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); + printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); + printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); + printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); + printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); + printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); + printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); + printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); + printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); + printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); + printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); + printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); + printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); + printk(KERN_DEBUG "\tLPT head is at %d:%d\n", c->nhead_lnum, c->nhead_offs); - printk(KERN_ERR "\tLPT ltab is at %d:%d\n", + printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); if (c->big_lpt) - printk(KERN_ERR "\tLPT lsave is at %d:%d\n", + printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", c->lsave_lnum, c->lsave_offs); for (i = 0; i < c->lpt_lebs; i++) - printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " + printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " "cmt %d\n", i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); spin_unlock(&dbg_lock); @@ -867,12 +867,12 @@ void dbg_dump_sleb(const struct ubifs_info *c, { struct ubifs_scan_node *snod; - printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", + printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n", current->pid, sleb->lnum, offs); list_for_each_entry(snod, &sleb->nodes, list) { cond_resched(); - printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, + printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum, snod->offs, snod->len); dbg_dump_node(c, snod->node); } @@ -887,7 +887,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) if (dbg_is_tst_rcvry(c)) return; - printk(KERN_ERR "(pid %d) start dumping LEB %d\n", + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", current->pid, lnum); buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); @@ -902,17 +902,17 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) goto out; } - printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, + printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, sleb->nodes_cnt, sleb->endpt); list_for_each_entry(snod, &sleb->nodes, list) { cond_resched(); - printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, + printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, snod->offs, snod->len); dbg_dump_node(c, snod->node); } - printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", current->pid, lnum); ubifs_scan_destroy(sleb); @@ -934,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c, else zbr = &c->zroot; - printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" + printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip, znode->level, znode->child_cnt, znode->flags); @@ -944,18 +944,18 @@ void dbg_dump_znode(const struct ubifs_info *c, return; } - printk(KERN_ERR "zbranches:\n"); + printk(KERN_DEBUG "zbranches:\n"); for (n = 0; n < znode->child_cnt; n++) { zbr = &znode->zbranch[n]; if (znode->level > 0) - printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " + printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " "%s\n", n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, dbg_snprintf_key(c, &zbr->key, key_buf, DBG_KEY_BUF_LEN)); else - printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " + printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " "%s\n", n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, dbg_snprintf_key(c, &zbr->key, @@ -969,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", + printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; - printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " + printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " "flags %d\n", i, lprops->lnum, lprops->hpos, lprops->free, lprops->dirty, lprops->flags); } - printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); + printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); } void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, @@ -986,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, { int i; - printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); - printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", + printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); + printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); - printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", + printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", pnode->flags, iip, pnode->level, pnode->num); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { struct ubifs_lprops *lp = &pnode->lprops[i]; - printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", + printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", i, lp->free, lp->dirty, lp->flags, lp->lnum); } } @@ -1004,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c) struct ubifs_znode *znode; int level; - printk(KERN_ERR "\n"); - printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); + printk(KERN_DEBUG "\n"); + printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; - printk(KERN_ERR "== Level %d ==\n", level); + printk(KERN_DEBUG "== Level %d ==\n", level); while (znode) { if (level != znode->level) { level = znode->level; - printk(KERN_ERR "== Level %d ==\n", level); + printk(KERN_DEBUG "== Level %d ==\n", level); } dbg_dump_znode(c, znode); znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); } - printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); + printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); } static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, diff --git a/trunk/fs/ubifs/debug.h b/trunk/fs/ubifs/debug.h index 9f717655df18..ad1a6fee6010 100644 --- a/trunk/fs/ubifs/debug.h +++ b/trunk/fs/ubifs/debug.h @@ -164,7 +164,9 @@ struct ubifs_global_debug_info { #define dbg_dump_stack() dump_stack() #define dbg_err(fmt, ...) do { \ + spin_lock(&dbg_lock); \ ubifs_err(fmt, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ } while (0) #define ubifs_dbg_msg(type, fmt, ...) \ @@ -215,6 +217,7 @@ struct ubifs_global_debug_info { /* Additional recovery messages */ #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) +extern spinlock_t dbg_lock; extern struct ubifs_global_debug_info ubifs_dbg; static inline int dbg_is_chk_gen(const struct ubifs_info *c) diff --git a/trunk/fs/ubifs/dir.c b/trunk/fs/ubifs/dir.c index ec9f1870ab7f..d6fe1c79f18b 100644 --- a/trunk/fs/ubifs/dir.c +++ b/trunk/fs/ubifs/dir.c @@ -566,7 +566,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) int sz_change = CALC_DENT_SIZE(dentry->d_name.len); int err, budgeted = 1; struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; - unsigned int saved_nlink = inode->i_nlink; /* * Budget request settings: deletion direntry, deletion inode (+1 for @@ -614,7 +613,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) out_cancel: dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - set_nlink(inode, saved_nlink); + inc_nlink(inode); unlock_2_inodes(dir, inode); if (budgeted) ubifs_release_budget(c, &req); @@ -705,7 +704,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inc_nlink(dir); - set_nlink(inode, 2); + inc_nlink(inode); + inc_nlink(inode); unlock_2_inodes(dir, inode); if (budgeted) ubifs_release_budget(c, &req); @@ -977,7 +977,6 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; - unsigned int saved_nlink; /* * Budget request settings: deletion direntry, new direntry, removing @@ -1060,14 +1059,13 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlink) { /* * Directories cannot have hard-links, so if this is a - * directory, just clear @i_nlink. + * directory, decrement its @i_nlink twice because an empty + * directory has @i_nlink 2. */ - saved_nlink = new_inode->i_nlink; if (is_dir) - clear_nlink(new_inode); - else drop_nlink(new_inode); new_inode->i_ctime = time; + drop_nlink(new_inode); } else { new_dir->i_size += new_sz; ubifs_inode(new_dir)->ui_size = new_dir->i_size; @@ -1104,7 +1102,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, out_cancel: if (unlink) { - set_nlink(new_inode, saved_nlink); + if (is_dir) + inc_nlink(new_inode); + inc_nlink(new_inode); } else { new_dir->i_size -= new_sz; ubifs_inode(new_dir)->ui_size = new_dir->i_size; diff --git a/trunk/fs/ubifs/recovery.c b/trunk/fs/ubifs/recovery.c index 2a935b317232..ee4f43f4bb99 100644 --- a/trunk/fs/ubifs/recovery.c +++ b/trunk/fs/ubifs/recovery.c @@ -679,8 +679,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE || ret == SCANNED_A_CORRUPT_NODE) { - dbg_rcvry("found corruption (%d) at %d:%d", - ret, lnum, offs); + dbg_rcvry("found corruption - %d", ret); break; } else { dbg_err("unexpected return value %d", ret); diff --git a/trunk/fs/ubifs/sb.c b/trunk/fs/ubifs/sb.c index 771f7fb6ce92..6094c5a5d7a8 100644 --- a/trunk/fs/ubifs/sb.c +++ b/trunk/fs/ubifs/sb.c @@ -410,23 +410,13 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) } if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { - ubifs_err("too few main LEBs count %d, must be at least %d", - c->main_lebs, UBIFS_MIN_MAIN_LEBS); + err = 7; goto failed; } - max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; - if (c->max_bud_bytes < max_bytes) { - ubifs_err("too small journal (%lld bytes), must be at least " - "%lld bytes", c->max_bud_bytes, max_bytes); - goto failed; - } - - max_bytes = (long long)c->leb_size * c->main_lebs; - if (c->max_bud_bytes > max_bytes) { - ubifs_err("too large journal size (%lld bytes), only %lld bytes" - "available in the main area", - c->max_bud_bytes, max_bytes); + if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || + c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { + err = 8; goto failed; } @@ -460,6 +450,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) goto failed; } + max_bytes = c->main_lebs * (long long)c->leb_size; if (c->rp_size < 0 || max_bytes < c->rp_size) { err = 14; goto failed; diff --git a/trunk/fs/ubifs/ubifs.h b/trunk/fs/ubifs/ubifs.h index 93d59aceaaef..12e94774aa88 100644 --- a/trunk/fs/ubifs/ubifs.h +++ b/trunk/fs/ubifs/ubifs.h @@ -84,6 +84,9 @@ #define INUM_WARN_WATERMARK 0xFFF00000 #define INUM_WATERMARK 0xFFFFFF00 +/* Largest key size supported in this implementation */ +#define CUR_MAX_KEY_LEN UBIFS_SK_LEN + /* Maximum number of entries in each LPT (LEB category) heap */ #define LPT_HEAP_SZ 256 @@ -274,10 +277,10 @@ struct ubifs_old_idx { /* The below union makes it easier to deal with keys */ union ubifs_key { - uint8_t u8[UBIFS_SK_LEN]; - uint32_t u32[UBIFS_SK_LEN/4]; - uint64_t u64[UBIFS_SK_LEN/8]; - __le32 j32[UBIFS_SK_LEN/4]; + uint8_t u8[CUR_MAX_KEY_LEN]; + uint32_t u32[CUR_MAX_KEY_LEN/4]; + uint64_t u64[CUR_MAX_KEY_LEN/8]; + __le32 j32[CUR_MAX_KEY_LEN/4]; }; /** diff --git a/trunk/fs/xfs/Makefile b/trunk/fs/xfs/Makefile index 0a9977983f92..427a4e82a588 100644 --- a/trunk/fs/xfs/Makefile +++ b/trunk/fs/xfs/Makefile @@ -96,6 +96,9 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_qm_bhv.o \ xfs_qm.o \ xfs_quotaops.o +ifeq ($(CONFIG_XFS_QUOTA),y) +xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o +endif xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += xfs_stats.o diff --git a/trunk/fs/xfs/xfs_aops.c b/trunk/fs/xfs/xfs_aops.c index 0dbb9e70fe21..74b9baf36ac3 100644 --- a/trunk/fs/xfs/xfs_aops.c +++ b/trunk/fs/xfs/xfs_aops.c @@ -26,7 +26,6 @@ #include "xfs_bmap_btree.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_rw.h" @@ -99,6 +98,23 @@ xfs_destroy_ioend( mempool_free(ioend, xfs_ioend_pool); } +/* + * If the end of the current ioend is beyond the current EOF, + * return the new EOF value, otherwise zero. + */ +STATIC xfs_fsize_t +xfs_ioend_new_eof( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + xfs_fsize_t bsize; + + bsize = ioend->io_offset + ioend->io_size; + isize = MIN(i_size_read(VFS_I(ip)), bsize); + return isize > ip->i_d.di_size ? isize : 0; +} + /* * Fast and loose check if this write could update the on-disk inode size. */ @@ -108,65 +124,32 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) XFS_I(ioend->io_inode)->i_d.di_size; } -STATIC int -xfs_setfilesize_trans_alloc( - struct xfs_ioend *ioend) -{ - struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; - struct xfs_trans *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - ioend->io_append_trans = tp; - - /* - * We hand off the transaction to the completion thread now, so - * clear the flag here. - */ - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - return 0; -} - /* * Update on-disk file size now that data has been written to disk. + * + * This function does not block as blocking on the inode lock in IO completion + * can lead to IO completion order dependency deadlocks.. If it can't get the + * inode ilock it will return EAGAIN. Callers must handle this. */ STATIC int xfs_setfilesize( - struct xfs_ioend *ioend) + xfs_ioend_t *ioend) { - struct xfs_inode *ip = XFS_I(ioend->io_inode); - struct xfs_trans *tp = ioend->io_append_trans; + xfs_inode_t *ip = XFS_I(ioend->io_inode); xfs_fsize_t isize; - /* - * The transaction was allocated in the I/O submission thread, - * thus we need to mark ourselves as beeing in a transaction - * manually. - */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return EAGAIN; - xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); - if (!isize) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, 0); - return 0; + isize = xfs_ioend_new_eof(ioend); + if (isize) { + trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + ip->i_d.di_size = isize; + xfs_mark_inode_dirty(ip); } - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); - - ip->i_d.di_size = isize; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - return xfs_trans_commit(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; } /* @@ -180,12 +163,10 @@ xfs_finish_ioend( struct xfs_ioend *ioend) { if (atomic_dec_and_test(&ioend->io_remaining)) { - struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; - if (ioend->io_type == IO_UNWRITTEN) - queue_work(mp->m_unwritten_workqueue, &ioend->io_work); - else if (ioend->io_append_trans) - queue_work(mp->m_data_workqueue, &ioend->io_work); + queue_work(xfsconvertd_workqueue, &ioend->io_work); + else if (xfs_ioend_is_append(ioend)) + queue_work(xfsdatad_workqueue, &ioend->io_work); else xfs_destroy_ioend(ioend); } @@ -214,36 +195,35 @@ xfs_end_io( * range to normal written extens after the data I/O has finished. */ if (ioend->io_type == IO_UNWRITTEN) { - /* - * For buffered I/O we never preallocate a transaction when - * doing the unwritten extent conversion, but for direct I/O - * we do not know if we are converting an unwritten extent - * or not at the point where we preallocate the transaction. - */ - if (ioend->io_append_trans) { - ASSERT(ioend->io_isdirect); - - current_set_flags_nested( - &ioend->io_append_trans->t_pflags, PF_FSTRANS); - xfs_trans_cancel(ioend->io_append_trans, 0); - } - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); if (error) { ioend->io_error = -error; goto done; } - } else if (ioend->io_append_trans) { - error = xfs_setfilesize(ioend); - if (error) - ioend->io_error = -error; - } else { - ASSERT(!xfs_ioend_is_append(ioend)); } + /* + * We might have to update the on-disk file size after extending + * writes. + */ + error = xfs_setfilesize(ioend); + ASSERT(!error || error == EAGAIN); + done: - xfs_destroy_ioend(ioend); + /* + * If we didn't complete processing of the ioend, requeue it to the + * tail of the workqueue for another attempt later. Otherwise destroy + * it. + */ + if (error == EAGAIN) { + atomic_inc(&ioend->io_remaining); + xfs_finish_ioend(ioend); + /* ensure we don't spin on blocked ioends */ + delay(1); + } else { + xfs_destroy_ioend(ioend); + } } /* @@ -279,7 +259,6 @@ xfs_alloc_ioend( */ atomic_set(&ioend->io_remaining, 1); ioend->io_isasync = 0; - ioend->io_isdirect = 0; ioend->io_error = 0; ioend->io_list = NULL; ioend->io_type = type; @@ -290,7 +269,6 @@ xfs_alloc_ioend( ioend->io_size = 0; ioend->io_iocb = NULL; ioend->io_result = 0; - ioend->io_append_trans = NULL; INIT_WORK(&ioend->io_work, xfs_end_io); return ioend; @@ -401,6 +379,14 @@ xfs_submit_ioend_bio( atomic_inc(&ioend->io_remaining); bio->bi_private = ioend; bio->bi_end_io = xfs_end_bio; + + /* + * If the I/O is beyond EOF we mark the inode dirty immediately + * but don't update the inode size until I/O completion. + */ + if (xfs_ioend_new_eof(ioend)) + xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); + submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); } @@ -1047,20 +1033,8 @@ xfs_vm_writepage( wbc, end_index); } - if (iohead) { - /* - * Reserve log space if we might write beyond the on-disk - * inode size. - */ - if (ioend->io_type != IO_UNWRITTEN && - xfs_ioend_is_append(ioend)) { - err = xfs_setfilesize_trans_alloc(ioend); - if (err) - goto error; - } - + if (iohead) xfs_submit_ioend(wbc, iohead); - } return 0; @@ -1340,32 +1314,17 @@ xfs_vm_direct_IO( { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); - struct xfs_ioend *ioend = NULL; ssize_t ret; if (rw & WRITE) { - size_t size = iov_length(iov, nr_segs); - - /* - * We need to preallocate a transaction for a size update - * here. In the case that this write both updates the size - * and converts at least on unwritten extent we will cancel - * the still clean transaction after the I/O has finished. - */ - iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT); - if (offset + size > XFS_I(inode)->i_d.di_size) { - ret = xfs_setfilesize_trans_alloc(ioend); - if (ret) - goto out_destroy_ioend; - ioend->io_isdirect = 1; - } + iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, 0); if (ret != -EIOCBQUEUED && iocb->private) - goto out_trans_cancel; + xfs_destroy_ioend(iocb->private); } else { ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, @@ -1374,16 +1333,6 @@ xfs_vm_direct_IO( } return ret; - -out_trans_cancel: - if (ioend->io_append_trans) { - current_set_flags_nested(&ioend->io_append_trans->t_pflags, - PF_FSTRANS); - xfs_trans_cancel(ioend->io_append_trans, 0); - } -out_destroy_ioend: - xfs_destroy_ioend(ioend); - return ret; } STATIC void diff --git a/trunk/fs/xfs/xfs_aops.h b/trunk/fs/xfs/xfs_aops.h index 84eafbcb0d9d..116dd5c37034 100644 --- a/trunk/fs/xfs/xfs_aops.h +++ b/trunk/fs/xfs/xfs_aops.h @@ -18,6 +18,8 @@ #ifndef __XFS_AOPS_H__ #define __XFS_AOPS_H__ +extern struct workqueue_struct *xfsdatad_workqueue; +extern struct workqueue_struct *xfsconvertd_workqueue; extern mempool_t *xfs_ioend_pool; /* @@ -46,14 +48,12 @@ typedef struct xfs_ioend { int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ unsigned int io_isasync : 1; /* needs aio_complete */ - unsigned int io_isdirect : 1;/* direct I/O */ struct inode *io_inode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */ size_t io_size; /* size of the extent */ xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ - struct xfs_trans *io_append_trans;/* xact. for size update */ struct kiocb *io_iocb; int io_result; } xfs_ioend_t; diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c index 3548c6f75593..188ef2fbd628 100644 --- a/trunk/fs/xfs/xfs_bmap.c +++ b/trunk/fs/xfs/xfs_bmap.c @@ -5536,12 +5536,8 @@ xfs_getbmap( if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) return XFS_ERROR(ENOMEM); out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); - if (!out) { - out = kmem_zalloc_large(bmv->bmv_count * - sizeof(struct getbmapx)); - if (!out) - return XFS_ERROR(ENOMEM); - } + if (!out) + return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { @@ -5665,10 +5661,7 @@ xfs_getbmap( break; } - if (is_vmalloc_addr(out)) - kmem_free_large(out); - else - kmem_free(out); + kmem_free(out); return error; } diff --git a/trunk/fs/xfs/xfs_buf.c b/trunk/fs/xfs/xfs_buf.c index 6819b5163e33..4dff85c7d7eb 100644 --- a/trunk/fs/xfs/xfs_buf.c +++ b/trunk/fs/xfs/xfs_buf.c @@ -45,6 +45,8 @@ static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); static struct workqueue_struct *xfslogd_workqueue; +struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsconvertd_workqueue; #ifdef XFS_BUF_LOCK_TRACKING # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) @@ -1791,8 +1793,21 @@ xfs_buf_init(void) if (!xfslogd_workqueue) goto out_free_buf_zone; + xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); + if (!xfsdatad_workqueue) + goto out_destroy_xfslogd_workqueue; + + xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", + WQ_MEM_RECLAIM, 1); + if (!xfsconvertd_workqueue) + goto out_destroy_xfsdatad_workqueue; + return 0; + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); + out_destroy_xfslogd_workqueue: + destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: kmem_zone_destroy(xfs_buf_zone); out: @@ -1802,6 +1817,8 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { + destroy_workqueue(xfsconvertd_workqueue); + destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); } diff --git a/trunk/fs/xfs/xfs_dfrag.c b/trunk/fs/xfs/xfs_dfrag.c index 1137bbc5eccb..dd974a55c77d 100644 --- a/trunk/fs/xfs/xfs_dfrag.c +++ b/trunk/fs/xfs/xfs_dfrag.c @@ -215,7 +215,7 @@ xfs_swap_extents( xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; - int src_log_flags, target_log_flags; + int ilf_fields, tilf_fields; int error = 0; int aforkblks = 0; int taforkblks = 0; @@ -385,8 +385,9 @@ xfs_swap_extents( tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; - src_log_flags = XFS_ILOG_CORE; - switch (ip->i_d.di_format) { + ilf_fields = XFS_ILOG_CORE; + + switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or @@ -396,15 +397,16 @@ xfs_swap_extents( ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } - src_log_flags |= XFS_ILOG_DEXT; + ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: - src_log_flags |= XFS_ILOG_DBROOT; + ilf_fields |= XFS_ILOG_DBROOT; break; } - target_log_flags = XFS_ILOG_CORE; - switch (tip->i_d.di_format) { + tilf_fields = XFS_ILOG_CORE; + + switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or @@ -414,10 +416,10 @@ xfs_swap_extents( tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } - target_log_flags |= XFS_ILOG_DEXT; + tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: - target_log_flags |= XFS_ILOG_DBROOT; + tilf_fields |= XFS_ILOG_DBROOT; break; } @@ -425,8 +427,8 @@ xfs_swap_extents( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_log_inode(tp, ip, src_log_flags); - xfs_trans_log_inode(tp, tip, target_log_flags); + xfs_trans_log_inode(tp, ip, ilf_fields); + xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the diff --git a/trunk/fs/xfs/xfs_dir2_block.c b/trunk/fs/xfs/xfs_dir2_block.c index d3b63aefd01d..9245e029b8ea 100644 --- a/trunk/fs/xfs/xfs_dir2_block.c +++ b/trunk/fs/xfs/xfs_dir2_block.c @@ -29,7 +29,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir2.h" #include "xfs_dir2_format.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" diff --git a/trunk/fs/xfs/xfs_dquot.c b/trunk/fs/xfs/xfs_dquot.c index 4be16a0cbe5a..53db20ee3e77 100644 --- a/trunk/fs/xfs/xfs_dquot.c +++ b/trunk/fs/xfs/xfs_dquot.c @@ -43,10 +43,11 @@ * Lock order: * * ip->i_lock - * qi->qi_tree_lock - * dquot->q_qlock (xfs_dqlock() and friends) - * dquot->q_flush (xfs_dqflock() and friends) - * qi->qi_lru_lock + * qh->qh_lock + * qi->qi_dqlist_lock + * dquot->q_qlock (xfs_dqlock() and friends) + * dquot->q_flush (xfs_dqflock() and friends) + * xfs_Gqm->qm_dqfrlist_lock * * If two dquots need to be locked the order is user before group/project, * otherwise by the lowest id first, see xfs_dqlock2. @@ -59,9 +60,6 @@ int xfs_dqreq_num; int xfs_dqerror_mod = 33; #endif -struct kmem_zone *xfs_qm_dqtrxzone; -static struct kmem_zone *xfs_qm_dqzone; - static struct lock_class_key xfs_dquot_other_class; /* @@ -71,12 +69,12 @@ void xfs_qm_dqdestroy( xfs_dquot_t *dqp) { - ASSERT(list_empty(&dqp->q_lru)); + ASSERT(list_empty(&dqp->q_freelist)); mutex_destroy(&dqp->q_qlock); - kmem_zone_free(xfs_qm_dqzone, dqp); + kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); - XFS_STATS_DEC(xs_qm_dquot); + atomic_dec(&xfs_Gqm->qm_totaldquots); } /* @@ -284,7 +282,7 @@ xfs_qm_dqalloc( * Return if this type of quotas is turned off while we didn't * have an inode lock */ - if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (ESRCH); } @@ -386,7 +384,7 @@ xfs_qm_dqtobp( dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; xfs_ilock(quotip, XFS_ILOCK_SHARED); - if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { /* * Return if this type of quotas is turned off while we * didn't have the quota inode lock. @@ -494,12 +492,12 @@ xfs_qm_dqread( int cancelflags = 0; - dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); + dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); dqp->dq_flags = type; dqp->q_core.d_id = cpu_to_be32(id); dqp->q_mount = mp; - INIT_LIST_HEAD(&dqp->q_lru); + INIT_LIST_HEAD(&dqp->q_freelist); mutex_init(&dqp->q_qlock); init_waitqueue_head(&dqp->q_pinwait); @@ -518,7 +516,7 @@ xfs_qm_dqread( if (!(type & XFS_DQ_USER)) lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - XFS_STATS_INC(xs_qm_dquot); + atomic_inc(&xfs_Gqm->qm_totaldquots); trace_xfs_dqread(dqp); @@ -603,6 +601,60 @@ xfs_qm_dqread( return error; } +/* + * Lookup a dquot in the incore dquot hashtable. We keep two separate + * hashtables for user and group dquots; and, these are global tables + * inside the XQM, not per-filesystem tables. + * The hash chain must be locked by caller, and it is left locked + * on return. Returning dquot is locked. + */ +STATIC int +xfs_qm_dqlookup( + xfs_mount_t *mp, + xfs_dqid_t id, + xfs_dqhash_t *qh, + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + + ASSERT(mutex_is_locked(&qh->qh_lock)); + + /* + * Traverse the hashchain looking for a match + */ + list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { + /* + * We already have the hashlock. We don't need the + * dqlock to look at the id field of the dquot, since the + * id can't be modified without the hashlock anyway. + */ + if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp) + continue; + + trace_xfs_dqlookup_found(dqp); + + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_FREEING) { + *O_dqpp = NULL; + xfs_dqunlock(dqp); + return -1; + } + + dqp->q_nrefs++; + + /* + * move the dquot to the front of the hashchain + */ + list_move(&dqp->q_hashlist, &qh->qh_list); + trace_xfs_dqlookup_done(dqp); + *O_dqpp = dqp; + return 0; + } + + *O_dqpp = NULL; + return 1; +} + /* * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a * a locked dquot, doing an allocation (if requested) as needed. @@ -620,10 +672,10 @@ xfs_qm_dqget( uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ { - struct xfs_quotainfo *qi = mp->m_quotainfo; - struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); - struct xfs_dquot *dqp; - int error; + xfs_dquot_t *dqp; + xfs_dqhash_t *h; + uint version; + int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || @@ -631,6 +683,7 @@ xfs_qm_dqget( (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { return (ESRCH); } + h = XFS_DQ_HASH(mp, id, type); #ifdef DEBUG if (xfs_do_dqerror) { @@ -646,33 +699,42 @@ xfs_qm_dqget( type == XFS_DQ_GROUP); if (ip) { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(xfs_inode_dquot(ip, type) == NULL); + if (type == XFS_DQ_USER) + ASSERT(ip->i_udquot == NULL); + else + ASSERT(ip->i_gdquot == NULL); } #endif restart: - mutex_lock(&qi->qi_tree_lock); - dqp = radix_tree_lookup(tree, id); - if (dqp) { - xfs_dqlock(dqp); - if (dqp->dq_flags & XFS_DQ_FREEING) { - xfs_dqunlock(dqp); - mutex_unlock(&qi->qi_tree_lock); - trace_xfs_dqget_freeing(dqp); - delay(1); - goto restart; - } + mutex_lock(&h->qh_lock); - dqp->q_nrefs++; - mutex_unlock(&qi->qi_tree_lock); - - trace_xfs_dqget_hit(dqp); - XFS_STATS_INC(xs_qm_dqcachehits); - *O_dqpp = dqp; - return 0; + /* + * Look in the cache (hashtable). + * The chain is kept locked during lookup. + */ + switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { + case -1: + XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); + mutex_unlock(&h->qh_lock); + delay(1); + goto restart; + case 0: + XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); + /* + * The dquot was found, moved to the front of the chain, + * taken off the freelist if it was on it, and locked + * at this point. Just unlock the hashchain and return. + */ + ASSERT(*O_dqpp); + ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); + mutex_unlock(&h->qh_lock); + trace_xfs_dqget_hit(*O_dqpp); + return 0; /* success */ + default: + XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); + break; } - mutex_unlock(&qi->qi_tree_lock); - XFS_STATS_INC(xs_qm_dqcachemisses); /* * Dquot cache miss. We don't want to keep the inode lock across @@ -683,6 +745,12 @@ xfs_qm_dqget( */ if (ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * Save the hashchain version stamp, and unlock the chain, so that + * we don't keep the lock across a disk read + */ + version = h->qh_version; + mutex_unlock(&h->qh_lock); error = xfs_qm_dqread(mp, id, type, flags, &dqp); @@ -692,53 +760,97 @@ xfs_qm_dqget( if (error) return error; + /* + * Dquot lock comes after hashlock in the lock ordering + */ if (ip) { /* * A dquot could be attached to this inode by now, since * we had dropped the ilock. */ - if (xfs_this_quota_on(mp, type)) { - struct xfs_dquot *dqp1; - - dqp1 = xfs_inode_dquot(ip, type); - if (dqp1) { + if (type == XFS_DQ_USER) { + if (!XFS_IS_UQUOTA_ON(mp)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + if (ip->i_udquot) { xfs_qm_dqdestroy(dqp); - dqp = dqp1; + dqp = ip->i_udquot; xfs_dqlock(dqp); goto dqret; } } else { - /* inode stays locked on return */ - xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); + if (!XFS_IS_OQUOTA_ON(mp)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + if (ip->i_gdquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_gdquot; + xfs_dqlock(dqp); + goto dqret; + } } } - mutex_lock(&qi->qi_tree_lock); - error = -radix_tree_insert(tree, id, dqp); - if (unlikely(error)) { - WARN_ON(error != EEXIST); - + /* + * Hashlock comes after ilock in lock order + */ + mutex_lock(&h->qh_lock); + if (version != h->qh_version) { + xfs_dquot_t *tmpdqp; /* - * Duplicate found. Just throw away the new dquot and start - * over. + * Now, see if somebody else put the dquot in the + * hashtable before us. This can happen because we didn't + * keep the hashchain lock. We don't have to worry about + * lock order between the two dquots here since dqp isn't + * on any findable lists yet. */ - mutex_unlock(&qi->qi_tree_lock); - trace_xfs_dqget_dup(dqp); - xfs_qm_dqdestroy(dqp); - XFS_STATS_INC(xs_qm_dquot_dups); - goto restart; + switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { + case 0: + case -1: + /* + * Duplicate found, either in cache or on its way out. + * Just throw away the new dquot and start over. + */ + if (tmpdqp) + xfs_qm_dqput(tmpdqp); + mutex_unlock(&h->qh_lock); + xfs_qm_dqdestroy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); + goto restart; + default: + break; + } } + /* + * Put the dquot at the beginning of the hash-chain and mp's list + * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. + */ + ASSERT(mutex_is_locked(&h->qh_lock)); + dqp->q_hash = h; + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + + /* + * Attach this dquot to this filesystem's list of all dquots, + * kept inside the mount structure in m_quotainfo field + */ + mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); + /* * We return a locked dquot to the caller, with a reference taken */ xfs_dqlock(dqp); dqp->q_nrefs = 1; - qi->qi_dquots++; - mutex_unlock(&qi->qi_tree_lock); - + list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); + mp->m_quotainfo->qi_dquots++; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + mutex_unlock(&h->qh_lock); dqret: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); trace_xfs_dqget_miss(dqp); @@ -747,22 +859,37 @@ xfs_qm_dqget( } -STATIC void -xfs_qm_dqput_final( +/* + * Release a reference to the dquot (decrement ref-count) + * and unlock it. If there is a group quota attached to this + * dquot, carefully release that too without tripping over + * deadlocks'n'stuff. + */ +void +xfs_qm_dqput( struct xfs_dquot *dqp) { - struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; struct xfs_dquot *gdqp; + ASSERT(dqp->q_nrefs > 0); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + trace_xfs_dqput(dqp); + +recurse: + if (--dqp->q_nrefs > 0) { + xfs_dqunlock(dqp); + return; + } + trace_xfs_dqput_free(dqp); - mutex_lock(&qi->qi_lru_lock); - if (list_empty(&dqp->q_lru)) { - list_add_tail(&dqp->q_lru, &qi->qi_lru_list); - qi->qi_lru_count++; - XFS_STATS_INC(xs_qm_dquot_unused); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + if (list_empty(&dqp->q_freelist)) { + list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); + xfs_Gqm->qm_dqfrlist_cnt++; } - mutex_unlock(&qi->qi_lru_lock); + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); /* * If we just added a udquot to the freelist, then we want to release @@ -779,29 +906,10 @@ xfs_qm_dqput_final( /* * If we had a group quota hint, release it now. */ - if (gdqp) - xfs_qm_dqput(gdqp); -} - -/* - * Release a reference to the dquot (decrement ref-count) and unlock it. - * - * If there is a group quota attached to this dquot, carefully release that - * too without tripping over deadlocks'n'stuff. - */ -void -xfs_qm_dqput( - struct xfs_dquot *dqp) -{ - ASSERT(dqp->q_nrefs > 0); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - trace_xfs_dqput(dqp); - - if (--dqp->q_nrefs > 0) - xfs_dqunlock(dqp); - else - xfs_qm_dqput_final(dqp); + if (gdqp) { + dqp = gdqp; + goto recurse; + } } /* @@ -983,6 +1091,17 @@ xfs_qm_dqflush( } +void +xfs_dqunlock( + xfs_dquot_t *dqp) +{ + xfs_dqunlock_nonotify(dqp); + if (dqp->q_logitem.qli_dquot == dqp) { + xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, + &dqp->q_logitem.qli_item); + } +} + /* * Lock two xfs_dquot structures. * @@ -1011,6 +1130,85 @@ xfs_dqlock2( } } +/* + * Take a dquot out of the mount's dqlist as well as the hashlist. This is + * called via unmount as well as quotaoff, and the purge will always succeed. + */ +void +xfs_qm_dqpurge( + struct xfs_dquot *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_dqhash *qh = dqp->q_hash; + + xfs_dqlock(dqp); + + /* + * If we're turning off quotas, we have to make sure that, for + * example, we don't delete quota disk blocks while dquots are + * in the process of getting written to those disk blocks. + * This dquot might well be on AIL, and we can't leave it there + * if we're turning off quotas. Basically, we need this flush + * lock, and are willing to block on it. + */ + if (!xfs_dqflock_nowait(dqp)) { + /* + * Block on the flush lock after nudging dquot buffer, + * if it is incore. + */ + xfs_dqflock_pushbuf_wait(dqp); + } + + /* + * If we are turning this type of quotas off, we don't care + * about the dirty metadata sitting in this dquot. OTOH, if + * we're unmounting, we do care, so we flush it and wait. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + + /* + * We don't care about getting disk errors here. We need + * to purge this dquot anyway, so we go ahead regardless. + */ + error = xfs_qm_dqflush(dqp, SYNC_WAIT); + if (error) + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); + xfs_dqflock(dqp); + } + + ASSERT(atomic_read(&dqp->q_pincount) == 0); + ASSERT(XFS_FORCED_SHUTDOWN(mp) || + !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); + + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + + mutex_lock(&qh->qh_lock); + list_del_init(&dqp->q_hashlist); + qh->qh_version++; + mutex_unlock(&qh->qh_lock); + + mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); + list_del_init(&dqp->q_mplist); + mp->m_quotainfo->qi_dqreclaims++; + mp->m_quotainfo->qi_dquots--; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + + /* + * We move dquots to the freelist as soon as their reference count + * hits zero, so it really should be on the freelist here. + */ + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + ASSERT(!list_empty(&dqp->q_freelist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + + xfs_qm_dqdestroy(dqp); +} + /* * Give the buffer a little push if it is incore and * wait on the flush lock. @@ -1043,31 +1241,3 @@ xfs_dqflock_pushbuf_wait( out_lock: xfs_dqflock(dqp); } - -int __init -xfs_qm_init(void) -{ - xfs_qm_dqzone = - kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); - if (!xfs_qm_dqzone) - goto out; - - xfs_qm_dqtrxzone = - kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); - if (!xfs_qm_dqtrxzone) - goto out_free_dqzone; - - return 0; - -out_free_dqzone: - kmem_zone_destroy(xfs_qm_dqzone); -out: - return -ENOMEM; -} - -void __exit -xfs_qm_exit(void) -{ - kmem_zone_destroy(xfs_qm_dqtrxzone); - kmem_zone_destroy(xfs_qm_dqzone); -} diff --git a/trunk/fs/xfs/xfs_dquot.h b/trunk/fs/xfs/xfs_dquot.h index ef9190bd8b30..a1d91d8f1802 100644 --- a/trunk/fs/xfs/xfs_dquot.h +++ b/trunk/fs/xfs/xfs_dquot.h @@ -29,6 +29,16 @@ * when quotas are off. */ +/* + * The hash chain headers (hash buckets) + */ +typedef struct xfs_dqhash { + struct list_head qh_list; + struct mutex qh_lock; + uint qh_version; /* ever increasing version */ + uint qh_nelems; /* number of dquots on the list */ +} xfs_dqhash_t; + struct xfs_mount; struct xfs_trans; @@ -37,7 +47,10 @@ struct xfs_trans; */ typedef struct xfs_dquot { uint dq_flags; /* various flags (XFS_DQ_*) */ - struct list_head q_lru; /* global free list of dquots */ + struct list_head q_freelist; /* global free list of dquots */ + struct list_head q_mplist; /* mount's list of dquots */ + struct list_head q_hashlist; /* gloabl hash list of dquots */ + xfs_dqhash_t *q_hash; /* the hashchain header */ struct xfs_mount*q_mount; /* filesystem this relates to */ struct xfs_trans*q_transp; /* trans this belongs to currently */ uint q_nrefs; /* # active refs from inodes */ @@ -97,37 +110,11 @@ static inline void xfs_dqlock(struct xfs_dquot *dqp) mutex_lock(&dqp->q_qlock); } -static inline void xfs_dqunlock(struct xfs_dquot *dqp) +static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) { mutex_unlock(&dqp->q_qlock); } -static inline int xfs_this_quota_on(struct xfs_mount *mp, int type) -{ - switch (type & XFS_DQ_ALLTYPES) { - case XFS_DQ_USER: - return XFS_IS_UQUOTA_ON(mp); - case XFS_DQ_GROUP: - case XFS_DQ_PROJ: - return XFS_IS_OQUOTA_ON(mp); - default: - return 0; - } -} - -static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) -{ - switch (type & XFS_DQ_ALLTYPES) { - case XFS_DQ_USER: - return ip->i_udquot; - case XFS_DQ_GROUP: - case XFS_DQ_PROJ: - return ip->i_gdquot; - default: - return NULL; - } -} - #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) @@ -138,10 +125,15 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ XFS_DQ_TO_QINF(dqp)->qi_gquotaip) +#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ + (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ + (XFS_IS_OQUOTA_ON((d)->q_mount)))) + extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, uint, struct xfs_dquot **); extern void xfs_qm_dqdestroy(xfs_dquot_t *); extern int xfs_qm_dqflush(xfs_dquot_t *, uint); +extern void xfs_qm_dqpurge(xfs_dquot_t *); extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, xfs_disk_dquot_t *); @@ -152,6 +144,7 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, extern void xfs_qm_dqput(xfs_dquot_t *); extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); +extern void xfs_dqunlock(struct xfs_dquot *); extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) diff --git a/trunk/fs/xfs/xfs_file.c b/trunk/fs/xfs/xfs_file.c index 54a67dd9ac0a..7e5bc872f2b4 100644 --- a/trunk/fs/xfs/xfs_file.c +++ b/trunk/fs/xfs/xfs_file.c @@ -163,6 +163,7 @@ xfs_file_fsync( struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; int error = 0; int log_flushed = 0; xfs_lsn_t lsn = 0; @@ -193,18 +194,75 @@ xfs_file_fsync( } /* - * All metadata updates are logged, which means that we just have - * to flush the log up to the latest LSN that touched the inode. + * We always need to make sure that the required inode state is safe on + * disk. The inode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. + * + * This code relies on the assumption that if the i_update_core field + * of the inode is clear and the inode is unpinned then it is clean + * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) { - if (!datasync || - (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) + + /* + * First check if the VFS inode is marked dirty. All the dirtying + * of non-transactional updates do not go through mark_inode_dirty*, + * which allows us to distinguish between pure timestamp updates + * and i_size updates which need to be caught for fdatasync. + * After that also check for the dirty state in the XFS inode, which + * might gets cleared when the inode gets written out via the AIL + * or xfs_iflush_cluster. + */ + if (((inode->i_state & I_DIRTY_DATASYNC) || + ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && + ip->i_update_core) { + /* + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. + */ + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return -error; + } + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. + */ + xfs_trans_ijoin(tp, ip, 0); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); + + lsn = ip->i_itemp->ili_last_lsn; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } else { + /* + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. + */ + if (xfs_ipincount(ip)) lsn = ip->i_itemp->ili_last_lsn; + xfs_iunlock(ip, XFS_ILOCK_SHARED); } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (lsn) + if (!error && lsn) error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); /* @@ -601,6 +659,9 @@ xfs_file_aio_write_checks( return error; } + if (likely(!(file->f_mode & FMODE_NOCMTIME))) + file_update_time(file); + /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this @@ -623,15 +684,6 @@ xfs_file_aio_write_checks( if (error) return error; - /* - * Updating the timestamps will grab the ilock again from - * xfs_fs_dirty_inode, so we have to call it after dropping the - * lock above. Eventually we should look into a way to avoid - * the pointless lock roundtrip. - */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); - /* * If we're writing the file then make sure to clear the setuid and * setgid bits if the process is not being run by root. This keeps diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c index a98cb4524e6c..8c3e46394d48 100644 --- a/trunk/fs/xfs/xfs_iget.c +++ b/trunk/fs/xfs/xfs_iget.c @@ -91,6 +91,7 @@ xfs_inode_alloc( ip->i_afp = NULL; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; + ip->i_update_core = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); @@ -349,20 +350,9 @@ xfs_iget_cache_miss( BUG(); } - /* - * These values must be set before inserting the inode into the radix - * tree as the moment it is inserted a concurrent lookup (allowed by the - * RCU locking mechanism) can find it and that lookup must see that this - * is an inode currently under construction (i.e. that XFS_INEW is set). - * The ip->i_flags_lock that protects the XFS_INEW flag forms the - * memory barrier that ensures this detection works correctly at lookup - * time. - */ - ip->i_udquot = ip->i_gdquot = NULL; - xfs_iflags_set(ip, XFS_INEW); + spin_lock(&pag->pag_ici_lock); /* insert the new inode */ - spin_lock(&pag->pag_ici_lock); error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); @@ -370,6 +360,11 @@ xfs_iget_cache_miss( error = EAGAIN; goto out_preload_end; } + + /* These values _must_ be set before releasing the radix tree lock! */ + ip->i_udquot = ip->i_gdquot = NULL; + xfs_iflags_set(ip, XFS_INEW); + spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); @@ -423,15 +418,6 @@ xfs_iget( xfs_perag_t *pag; xfs_agino_t agino; - /* - * xfs_reclaim_inode() uses the ILOCK to ensure an inode - * doesn't get freed while it's being referenced during a - * radix tree traversal here. It assumes this function - * aqcuires only the ILOCK (and therefore it has no need to - * involve the IOLOCK in this synchronization). - */ - ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); - /* reject inode numbers outside existing AGs */ if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return EINVAL; @@ -656,7 +642,8 @@ xfs_iunlock( (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | + XFS_LOCK_DEP_MASK)) == 0); ASSERT(lock_flags != 0); if (lock_flags & XFS_IOLOCK_EXCL) @@ -669,6 +656,16 @@ xfs_iunlock( else if (lock_flags & XFS_ILOCK_SHARED) mrunlock_shared(&ip->i_lock); + if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) && + !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) { + /* + * Let the AIL know that this item has been unlocked in case + * it is in the AIL and anyone is waiting on it. Don't do + * this if the caller has asked us not to. + */ + xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, + (xfs_log_item_t*)(ip->i_itemp)); + } trace_xfs_iunlock(ip, lock_flags, _RET_IP_); } diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index bc46c0a133d3..b21022499c2e 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -1656,13 +1656,14 @@ xfs_ifree_cluster( iip = ip->i_itemp; if (!iip || xfs_inode_clean(ip)) { ASSERT(ip != free_ip); + ip->i_update_core = 0; xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); continue; } - iip->ili_last_fields = iip->ili_fields; - iip->ili_fields = 0; + iip->ili_last_fields = iip->ili_format.ilf_fields; + iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); @@ -2176,7 +2177,7 @@ xfs_iflush_fork( mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_fields & dataflag[whichfork]) && + if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); @@ -2186,8 +2187,8 @@ xfs_iflush_fork( case XFS_DINODE_FMT_EXTENTS: ASSERT((ifp->if_flags & XFS_IFEXTENTS) || - !(iip->ili_fields & extflag[whichfork])); - if ((iip->ili_fields & extflag[whichfork]) && + !(iip->ili_format.ilf_fields & extflag[whichfork])); + if ((iip->ili_format.ilf_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(xfs_iext_get_ext(ifp, 0)); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); @@ -2197,7 +2198,7 @@ xfs_iflush_fork( break; case XFS_DINODE_FMT_BTREE: - if ((iip->ili_fields & brootflag[whichfork]) && + if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && (ifp->if_broot_bytes > 0)) { ASSERT(ifp->if_broot != NULL); ASSERT(ifp->if_broot_bytes <= @@ -2210,14 +2211,14 @@ xfs_iflush_fork( break; case XFS_DINODE_FMT_DEV: - if (iip->ili_fields & XFS_ILOG_DEV) { + if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); } break; case XFS_DINODE_FMT_UUID: - if (iip->ili_fields & XFS_ILOG_UUID) { + if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { ASSERT(whichfork == XFS_DATA_FORK); memcpy(XFS_DFORK_DPTR(dip), &ip->i_df.if_u2.if_uuid, @@ -2450,8 +2451,9 @@ xfs_iflush( * to disk, because the log record didn't make it to disk! */ if (XFS_FORCED_SHUTDOWN(mp)) { + ip->i_update_core = 0; if (iip) - iip->ili_fields = 0; + iip->ili_format.ilf_fields = 0; xfs_ifunlock(ip); return XFS_ERROR(EIO); } @@ -2531,6 +2533,26 @@ xfs_iflush_int( /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); + /* + * Clear i_update_core before copying out the data. + * This is for coordination with our timestamp updates + * that don't hold the inode lock. They will always + * update the timestamps BEFORE setting i_update_core, + * so if we clear i_update_core after they set it we + * are guaranteed to see their updates to the timestamps. + * I believe that this depends on strongly ordered memory + * semantics, but we have that. We use the SYNCHRONIZE + * macro to make sure that the compiler does not reorder + * the i_update_core access below the data copy below. + */ + ip->i_update_core = 0; + SYNCHRONIZE(); + + /* + * Make sure to get the latest timestamps from the Linux inode. + */ + xfs_synchronize_times(ip); + if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, @@ -2641,33 +2663,36 @@ xfs_iflush_int( xfs_inobp_check(mp, bp); /* - * We've recorded everything logged in the inode, so we'd like to clear - * the ili_fields bits so we don't log and flush things unnecessarily. - * However, we can't stop logging all this information until the data - * we've copied into the disk buffer is written to disk. If we did we - * might overwrite the copy of the inode in the log with all the data - * after re-logging only part of it, and in the face of a crash we - * wouldn't have all the data we need to recover. + * We've recorded everything logged in the inode, so we'd + * like to clear the ilf_fields bits so we don't log and + * flush things unnecessarily. However, we can't stop + * logging all this information until the data we've copied + * into the disk buffer is written to disk. If we did we might + * overwrite the copy of the inode in the log with all the + * data after re-logging only part of it, and in the face of + * a crash we wouldn't have all the data we need to recover. * - * What we do is move the bits to the ili_last_fields field. When - * logging the inode, these bits are moved back to the ili_fields field. - * In the xfs_iflush_done() routine we clear ili_last_fields, since we - * know that the information those bits represent is permanently on - * disk. As long as the flush completes before the inode is logged - * again, then both ili_fields and ili_last_fields will be cleared. + * What we do is move the bits to the ili_last_fields field. + * When logging the inode, these bits are moved back to the + * ilf_fields field. In the xfs_iflush_done() routine we + * clear ili_last_fields, since we know that the information + * those bits represent is permanently on disk. As long as + * the flush completes before the inode is logged again, then + * both ilf_fields and ili_last_fields will be cleared. * - * We can play with the ili_fields bits here, because the inode lock - * must be held exclusively in order to set bits there and the flush - * lock protects the ili_last_fields bits. Set ili_logged so the flush - * done routine can tell whether or not to look in the AIL. Also, store - * the current LSN of the inode so that we can tell whether the item has - * moved in the AIL from xfs_iflush_done(). In order to read the lsn we - * need the AIL lock, because it is a 64 bit value that cannot be read - * atomically. + * We can play with the ilf_fields bits here, because the inode + * lock must be held exclusively in order to set bits there + * and the flush lock protects the ili_last_fields bits. + * Set ili_logged so the flush done + * routine can tell whether or not to look in the AIL. + * Also, store the current LSN of the inode so that we can tell + * whether the item has moved in the AIL from xfs_iflush_done(). + * In order to read the lsn we need the AIL lock, because + * it is a 64 bit value that cannot be read atomically. */ - if (iip != NULL && iip->ili_fields != 0) { - iip->ili_last_fields = iip->ili_fields; - iip->ili_fields = 0; + if (iip != NULL && iip->ili_format.ilf_fields != 0) { + iip->ili_last_fields = iip->ili_format.ilf_fields; + iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, @@ -2686,7 +2711,8 @@ xfs_iflush_int( } else { /* * We're flushing an inode which is not in the AIL and has - * not been logged. For this case we can immediately drop + * not been logged but has i_update_core set. For this + * case we can use a B_DELWRI flush and immediately drop * the inode flush lock because we can avoid the whole * AIL state thing. It's OK to drop the flush lock now, * because we've already locked the buffer and to do anything diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h index f123dbe6d42a..2f27b7454085 100644 --- a/trunk/fs/xfs/xfs_inode.h +++ b/trunk/fs/xfs/xfs_inode.h @@ -241,6 +241,7 @@ typedef struct xfs_inode { spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ unsigned long i_flags; /* see defined flags below */ + unsigned char i_update_core; /* timestamps/size is dirty */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ @@ -273,20 +274,6 @@ static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip) return ip->i_d.di_size; } -/* - * If this I/O goes past the on-disk inode size update it unless it would - * be past the current in-core inode size. - */ -static inline xfs_fsize_t -xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size) -{ - xfs_fsize_t i_size = i_size_read(VFS_I(ip)); - - if (new_size > i_size) - new_size = i_size; - return new_size > ip->i_d.di_size ? new_size : 0; -} - /* * i_flags helper functions */ @@ -435,6 +422,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHARED (1<<1) #define XFS_ILOCK_EXCL (1<<2) #define XFS_ILOCK_SHARED (1<<3) +#define XFS_IUNLOCK_NONOTIFY (1<<4) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) @@ -443,7 +431,8 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ - { XFS_ILOCK_SHARED, "ILOCK_SHARED" } + { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ + { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" } /* @@ -533,6 +522,10 @@ void xfs_promote_inode(struct xfs_inode *); void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); +void xfs_synchronize_times(xfs_inode_t *); +void xfs_mark_inode_dirty(xfs_inode_t *); +void xfs_mark_inode_dirty_sync(xfs_inode_t *); + #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c index 05d924efceaf..91d71dcd4852 100644 --- a/trunk/fs/xfs/xfs_inode_item.c +++ b/trunk/fs/xfs/xfs_inode_item.c @@ -57,28 +57,77 @@ xfs_inode_item_size( struct xfs_inode *ip = iip->ili_inode; uint nvecs = 2; + /* + * Only log the data/extents/b-tree root if there is something + * left to log. + */ + iip->ili_format.ilf_fields |= XFS_ILOG_CORE; + switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - if ((iip->ili_fields & XFS_ILOG_DEXT) && - ip->i_d.di_nextents > 0 && - ip->i_df.if_bytes > 0) + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | + XFS_ILOG_DEV | XFS_ILOG_UUID); + if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && + (ip->i_d.di_nextents > 0) && + (ip->i_df.if_bytes > 0)) { + ASSERT(ip->i_df.if_u1.if_extents != NULL); nvecs++; + } else { + iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; + } break; case XFS_DINODE_FMT_BTREE: - if ((iip->ili_fields & XFS_ILOG_DBROOT) && - ip->i_df.if_broot_bytes > 0) + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | + XFS_ILOG_DEV | XFS_ILOG_UUID); + if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && + (ip->i_df.if_broot_bytes > 0)) { + ASSERT(ip->i_df.if_broot != NULL); nvecs++; + } else { + ASSERT(!(iip->ili_format.ilf_fields & + XFS_ILOG_DBROOT)); +#ifdef XFS_TRANS_DEBUG + if (iip->ili_root_size > 0) { + ASSERT(iip->ili_root_size == + ip->i_df.if_broot_bytes); + ASSERT(memcmp(iip->ili_orig_root, + ip->i_df.if_broot, + iip->ili_root_size) == 0); + } else { + ASSERT(ip->i_df.if_broot_bytes == 0); + } +#endif + iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; + } break; case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_fields & XFS_ILOG_DDATA) && - ip->i_df.if_bytes > 0) + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | + XFS_ILOG_DEV | XFS_ILOG_UUID); + if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && + (ip->i_df.if_bytes > 0)) { + ASSERT(ip->i_df.if_u1.if_data != NULL); + ASSERT(ip->i_d.di_size > 0); nvecs++; + } else { + iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; + } break; case XFS_DINODE_FMT_DEV: + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | + XFS_ILOG_DEXT | XFS_ILOG_UUID); + break; + case XFS_DINODE_FMT_UUID: + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | + XFS_ILOG_DEXT | XFS_ILOG_DEV); break; default: @@ -86,31 +135,56 @@ xfs_inode_item_size( break; } - if (!XFS_IFORK_Q(ip)) + /* + * If there are no attributes associated with this file, + * then there cannot be anything more to log. + * Clear all attribute-related log flags. + */ + if (!XFS_IFORK_Q(ip)) { + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); return nvecs; - + } /* * Log any necessary attribute data. */ switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: - if ((iip->ili_fields & XFS_ILOG_AEXT) && - ip->i_d.di_anextents > 0 && - ip->i_afp->if_bytes > 0) + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); + if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && + (ip->i_d.di_anextents > 0) && + (ip->i_afp->if_bytes > 0)) { + ASSERT(ip->i_afp->if_u1.if_extents != NULL); nvecs++; + } else { + iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; + } break; case XFS_DINODE_FMT_BTREE: - if ((iip->ili_fields & XFS_ILOG_ABROOT) && - ip->i_afp->if_broot_bytes > 0) + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); + if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && + (ip->i_afp->if_broot_bytes > 0)) { + ASSERT(ip->i_afp->if_broot != NULL); nvecs++; + } else { + iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; + } break; case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_fields & XFS_ILOG_ADATA) && - ip->i_afp->if_bytes > 0) + iip->ili_format.ilf_fields &= + ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); + if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && + (ip->i_afp->if_bytes > 0)) { + ASSERT(ip->i_afp->if_u1.if_data != NULL); nvecs++; + } else { + iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; + } break; default: @@ -180,11 +254,48 @@ xfs_inode_item_format( vecp++; nvecs = 1; + /* + * Clear i_update_core if the timestamps (or any other + * non-transactional modification) need flushing/logging + * and we're about to log them with the rest of the core. + * + * This is the same logic as xfs_iflush() but this code can't + * run at the same time as xfs_iflush because we're in commit + * processing here and so we have the inode lock held in + * exclusive mode. Although it doesn't really matter + * for the timestamps if both routines were to grab the + * timestamps or not. That would be ok. + * + * We clear i_update_core before copying out the data. + * This is for coordination with our timestamp updates + * that don't hold the inode lock. They will always + * update the timestamps BEFORE setting i_update_core, + * so if we clear i_update_core after they set it we + * are guaranteed to see their updates to the timestamps + * either here. Likewise, if they set it after we clear it + * here, we'll see it either on the next commit of this + * inode or the next time the inode gets flushed via + * xfs_iflush(). This depends on strongly ordered memory + * semantics, but we have that. We use the SYNCHRONIZE + * macro to make sure that the compiler does not reorder + * the i_update_core access below the data copy below. + */ + if (ip->i_update_core) { + ip->i_update_core = 0; + SYNCHRONIZE(); + } + + /* + * Make sure to get the latest timestamps from the Linux inode. + */ + xfs_synchronize_times(ip); + vecp->i_addr = &ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; nvecs++; + iip->ili_format.ilf_fields |= XFS_ILOG_CORE; /* * If this is really an old format inode, then we need to @@ -217,17 +328,16 @@ xfs_inode_item_format( switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - iip->ili_fields &= - ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | - XFS_ILOG_DEV | XFS_ILOG_UUID); - - if ((iip->ili_fields & XFS_ILOG_DEXT) && - ip->i_d.di_nextents > 0 && - ip->i_df.if_bytes > 0) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | + XFS_ILOG_DEV | XFS_ILOG_UUID))); + if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { + ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_extents != NULL); - ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); + ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_extents_buf == NULL); - + ASSERT((ip->i_df.if_bytes / + (uint)sizeof(xfs_bmbt_rec_t)) > 0); #ifdef XFS_NATIVE_HOST if (ip->i_d.di_nextents == ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { @@ -249,18 +359,15 @@ xfs_inode_item_format( iip->ili_format.ilf_dsize = vecp->i_len; vecp++; nvecs++; - } else { - iip->ili_fields &= ~XFS_ILOG_DEXT; } break; case XFS_DINODE_FMT_BTREE: - iip->ili_fields &= - ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | - XFS_ILOG_DEV | XFS_ILOG_UUID); - - if ((iip->ili_fields & XFS_ILOG_DBROOT) && - ip->i_df.if_broot_bytes > 0) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_DDATA | XFS_ILOG_DEXT | + XFS_ILOG_DEV | XFS_ILOG_UUID))); + if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { + ASSERT(ip->i_df.if_broot_bytes > 0); ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; @@ -268,30 +375,15 @@ xfs_inode_item_format( vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; - } else { - ASSERT(!(iip->ili_fields & - XFS_ILOG_DBROOT)); -#ifdef XFS_TRANS_DEBUG - if (iip->ili_root_size > 0) { - ASSERT(iip->ili_root_size == - ip->i_df.if_broot_bytes); - ASSERT(memcmp(iip->ili_orig_root, - ip->i_df.if_broot, - iip->ili_root_size) == 0); - } else { - ASSERT(ip->i_df.if_broot_bytes == 0); - } -#endif - iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; case XFS_DINODE_FMT_LOCAL: - iip->ili_fields &= - ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | - XFS_ILOG_DEV | XFS_ILOG_UUID); - if ((iip->ili_fields & XFS_ILOG_DDATA) && - ip->i_df.if_bytes > 0) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | + XFS_ILOG_DEV | XFS_ILOG_UUID))); + if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { + ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); @@ -309,26 +401,24 @@ xfs_inode_item_format( vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; - } else { - iip->ili_fields &= ~XFS_ILOG_DDATA; } break; case XFS_DINODE_FMT_DEV: - iip->ili_fields &= - ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | - XFS_ILOG_DEXT | XFS_ILOG_UUID); - if (iip->ili_fields & XFS_ILOG_DEV) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | + XFS_ILOG_DDATA | XFS_ILOG_UUID))); + if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { iip->ili_format.ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; } break; case XFS_DINODE_FMT_UUID: - iip->ili_fields &= - ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | - XFS_ILOG_DEXT | XFS_ILOG_DEV); - if (iip->ili_fields & XFS_ILOG_UUID) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | + XFS_ILOG_DDATA | XFS_ILOG_DEV))); + if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { iip->ili_format.ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; } @@ -340,25 +430,31 @@ xfs_inode_item_format( } /* - * If there are no attributes associated with the file, then we're done. + * If there are no attributes associated with the file, + * then we're done. + * Assert that no attribute-related log flags are set. */ if (!XFS_IFORK_Q(ip)) { - iip->ili_fields &= - ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); - goto out; + iip->ili_format.ilf_size = nvecs; + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); + return; } switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: - iip->ili_fields &= - ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); - - if ((iip->ili_fields & XFS_ILOG_AEXT) && - ip->i_d.di_anextents > 0 && - ip->i_afp->if_bytes > 0) { - ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == - ip->i_d.di_anextents); + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); + if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { +#ifdef DEBUG + int nrecs = ip->i_afp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nrecs > 0); + ASSERT(nrecs == ip->i_d.di_anextents); + ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_extents != NULL); + ASSERT(ip->i_d.di_anextents > 0); +#endif #ifdef XFS_NATIVE_HOST /* * There are not delayed allocation extents @@ -375,36 +471,29 @@ xfs_inode_item_format( iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; - } else { - iip->ili_fields &= ~XFS_ILOG_AEXT; } break; case XFS_DINODE_FMT_BTREE: - iip->ili_fields &= - ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); - - if ((iip->ili_fields & XFS_ILOG_ABROOT) && - ip->i_afp->if_broot_bytes > 0) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); + if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { + ASSERT(ip->i_afp->if_broot_bytes > 0); ASSERT(ip->i_afp->if_broot != NULL); - vecp->i_addr = ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; - } else { - iip->ili_fields &= ~XFS_ILOG_ABROOT; } break; case XFS_DINODE_FMT_LOCAL: - iip->ili_fields &= - ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); - - if ((iip->ili_fields & XFS_ILOG_ADATA) && - ip->i_afp->if_bytes > 0) { + ASSERT(!(iip->ili_format.ilf_fields & + (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); + if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { + ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_data != NULL); vecp->i_addr = ip->i_afp->if_u1.if_data; @@ -421,8 +510,6 @@ xfs_inode_item_format( vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; - } else { - iip->ili_fields &= ~XFS_ILOG_ADATA; } break; @@ -431,15 +518,6 @@ xfs_inode_item_format( break; } -out: - /* - * Now update the log format that goes out to disk from the in-core - * values. We always write the inode core to make the arithmetic - * games in recovery easier, which isn't a big deal as just about any - * transaction would dirty it anyway. - */ - iip->ili_format.ilf_fields = XFS_ILOG_CORE | - (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); iip->ili_format.ilf_size = nvecs; } @@ -518,13 +596,17 @@ xfs_inode_item_trylock( /* Stale items should force out the iclog */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_SHARED); + /* + * we hold the AIL lock - notify the unlock routine of this + * so it doesn't try to get the lock again. + */ + xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_PINNED; } #ifdef DEBUG if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - ASSERT(iip->ili_fields != 0); + ASSERT(iip->ili_format.ilf_fields != 0); ASSERT(iip->ili_logged == 0); ASSERT(lip->li_flags & XFS_LI_IN_AIL); } @@ -556,7 +638,7 @@ xfs_inode_item_unlock( if (iip->ili_extents_buf != NULL) { ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); ASSERT(ip->i_d.di_nextents > 0); - ASSERT(iip->ili_fields & XFS_ILOG_DEXT); + ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); ASSERT(ip->i_df.if_bytes > 0); kmem_free(iip->ili_extents_buf); iip->ili_extents_buf = NULL; @@ -564,7 +646,7 @@ xfs_inode_item_unlock( if (iip->ili_aextents_buf != NULL) { ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); ASSERT(ip->i_d.di_anextents > 0); - ASSERT(iip->ili_fields & XFS_ILOG_AEXT); + ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); ASSERT(ip->i_afp->if_bytes > 0); kmem_free(iip->ili_aextents_buf); iip->ili_aextents_buf = NULL; @@ -679,7 +761,8 @@ xfs_inode_item_push( * lock without sleeping, then there must not have been * anyone in the process of flushing the inode. */ - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0); + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || + iip->ili_format.ilf_fields != 0); /* * Push the inode to it's backing buffer. This will not remove the @@ -902,7 +985,7 @@ xfs_iflush_abort( * Clear the inode logging fields so no more flushes are * attempted. */ - iip->ili_fields = 0; + iip->ili_format.ilf_fields = 0; } /* * Release the inode's flush lock since we're done with it. diff --git a/trunk/fs/xfs/xfs_inode_item.h b/trunk/fs/xfs/xfs_inode_item.h index 41d61c3b7a36..d3dee61e6d91 100644 --- a/trunk/fs/xfs/xfs_inode_item.h +++ b/trunk/fs/xfs/xfs_inode_item.h @@ -86,15 +86,6 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ - -/* - * The timestamps are dirty, but not necessarily anything else in the inode - * core. Unlike the other fields above this one must never make it to disk - * in the ilf_fields of the inode_log_format, but is purely store in-memory in - * ili_fields in the inode_log_item. - */ -#define XFS_ILOG_TIMESTAMP 0x4000 - #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ XFS_ILOG_UUID | XFS_ILOG_ADATA | \ @@ -110,7 +101,7 @@ typedef struct xfs_inode_log_format_64 { XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ XFS_ILOG_DEV | XFS_ILOG_UUID | \ XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ - XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP) + XFS_ILOG_ABROOT) static inline int xfs_ilog_fbroot(int w) { @@ -143,7 +134,6 @@ typedef struct xfs_inode_log_item { unsigned short ili_lock_flags; /* lock flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ - unsigned int ili_fields; /* fields to be logged */ struct xfs_bmbt_rec *ili_extents_buf; /* array of logged data exts */ struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged @@ -158,7 +148,9 @@ typedef struct xfs_inode_log_item { static inline int xfs_inode_clean(xfs_inode_t *ip) { - return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL); + return (!ip->i_itemp || + !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && + !ip->i_update_core; } extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); diff --git a/trunk/fs/xfs/xfs_ioctl.c b/trunk/fs/xfs/xfs_ioctl.c index f588320dc4b9..76f3ca5cfc36 100644 --- a/trunk/fs/xfs/xfs_ioctl.c +++ b/trunk/fs/xfs/xfs_ioctl.c @@ -450,12 +450,9 @@ xfs_attrmulti_attr_get( if (*len > XATTR_SIZE_MAX) return EINVAL; - kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL); - if (!kbuf) { - kbuf = kmem_zalloc_large(*len); - if (!kbuf) - return ENOMEM; - } + kbuf = kmalloc(*len, GFP_KERNEL); + if (!kbuf) + return ENOMEM; error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); if (error) @@ -465,10 +462,7 @@ xfs_attrmulti_attr_get( error = EFAULT; out_kfree: - if (is_vmalloc_addr(kbuf)) - kmem_free_large(kbuf); - else - kmem_free(kbuf); + kfree(kbuf); return error; } diff --git a/trunk/fs/xfs/xfs_ioctl32.c b/trunk/fs/xfs/xfs_ioctl32.c index a849a5473aff..f9ccb7b7c043 100644 --- a/trunk/fs/xfs/xfs_ioctl32.c +++ b/trunk/fs/xfs/xfs_ioctl32.c @@ -293,7 +293,7 @@ xfs_compat_ioc_bulkstat( int res; error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), NULL, &res); + sizeof(compat_xfs_bstat_t), 0, &res); } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c index 71a464503c43..246c7d57c6f9 100644 --- a/trunk/fs/xfs/xfs_iomap.c +++ b/trunk/fs/xfs/xfs_iomap.c @@ -31,7 +31,6 @@ #include "xfs_ialloc_btree.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -646,7 +645,6 @@ xfs_iomap_write_unwritten( xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; - xfs_fsize_t i_size; uint resblks; int committed; int error; @@ -707,22 +705,7 @@ xfs_iomap_write_unwritten( if (error) goto error_on_bmapi_transaction; - /* - * Log the updated inode size as we go. We have to be careful - * to only log it up to the actual write offset if it is - * halfway into a block. - */ - i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); - if (i_size > offset + count) - i_size = offset + count; - - i_size = xfs_new_eof(ip, i_size); - if (i_size) { - ip->i_d.di_size = i_size; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - } - - error = xfs_bmap_finish(&tp, &free_list, &committed); + error = xfs_bmap_finish(&(tp), &(free_list), &committed); if (error) goto error_on_bmapi_transaction; diff --git a/trunk/fs/xfs/xfs_iops.c b/trunk/fs/xfs/xfs_iops.c index 3011b879f850..ab302539e5b9 100644 --- a/trunk/fs/xfs/xfs_iops.c +++ b/trunk/fs/xfs/xfs_iops.c @@ -50,15 +50,65 @@ #include #include -static int -xfs_initxattrs( - struct inode *inode, - const struct xattr *xattr_array, - void *fs_info) +/* + * Bring the timestamps in the XFS inode uptodate. + * + * Used before writing the inode to disk. + */ +void +xfs_synchronize_times( + xfs_inode_t *ip) { - const struct xattr *xattr; - struct xfs_inode *ip = XFS_I(inode); - int error = 0; + struct inode *inode = VFS_I(ip); + + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; +} + +/* + * If the linux inode is valid, mark it dirty, else mark the dirty state + * in the XFS inode to make sure we pick it up when reclaiming the inode. + */ +void +xfs_mark_inode_dirty_sync( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + mark_inode_dirty_sync(inode); + else { + barrier(); + ip->i_update_core = 1; + } +} + +void +xfs_mark_inode_dirty( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + mark_inode_dirty(inode); + else { + barrier(); + ip->i_update_core = 1; + } + +} + + +int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) +{ + const struct xattr *xattr; + struct xfs_inode *ip = XFS_I(inode); + int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { error = xfs_attr_set(ip, xattr->name, xattr->value, @@ -628,16 +678,19 @@ xfs_setattr_nonsize( inode->i_atime = iattr->ia_atime; ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; } if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -865,11 +918,13 @@ xfs_setattr_size( inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/trunk/fs/xfs/xfs_itable.c b/trunk/fs/xfs/xfs_itable.c index 9720c54bbed0..751e94fe1f77 100644 --- a/trunk/fs/xfs/xfs_itable.c +++ b/trunk/fs/xfs/xfs_itable.c @@ -62,6 +62,7 @@ xfs_bulkstat_one_int( { struct xfs_icdinode *dic; /* dinode core info pointer */ struct xfs_inode *ip; /* incore inode pointer */ + struct inode *inode; struct xfs_bstat *buf; /* return buffer */ int error = 0; /* error value */ @@ -85,6 +86,7 @@ xfs_bulkstat_one_int( ASSERT(ip->i_imap.im_blkno != 0); dic = &ip->i_d; + inode = VFS_I(ip); /* xfs_iget returns the following without needing * further change. @@ -97,12 +99,19 @@ xfs_bulkstat_one_int( buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; buf->bs_size = dic->di_size; - buf->bs_atime.tv_sec = dic->di_atime.t_sec; - buf->bs_atime.tv_nsec = dic->di_atime.t_nsec; - buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; - buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; - buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; - buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; + + /* + * We need to read the timestamps from the Linux inode because + * the VFS keeps writing directly into the inode structure instead + * of telling us about the updates. + */ + buf->bs_atime.tv_sec = inode->i_atime.tv_sec; + buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; + buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; + buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; + buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; + buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; + buf->bs_xflags = xfs_ip2xflags(ip); buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; buf->bs_extents = dic->di_nextents; diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 98a9cb5ffd17..e2cc3568c299 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -67,10 +67,15 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log, int eventual_size); STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); +/* local functions to manipulate grant head */ +STATIC int xlog_grant_log_space(xlog_t *log, + xlog_ticket_t *xtic); STATIC void xlog_grant_push_ail(struct log *log, int need_bytes); STATIC void xlog_regrant_reserve_log_space(xlog_t *log, xlog_ticket_t *ticket); +STATIC int xlog_regrant_write_log_space(xlog_t *log, + xlog_ticket_t *ticket); STATIC void xlog_ungrant_log_space(xlog_t *log, xlog_ticket_t *ticket); @@ -145,93 +150,78 @@ xlog_grant_add_space( } while (head_val != old); } -STATIC void -xlog_grant_head_init( - struct xlog_grant_head *head) -{ - xlog_assign_grant_head(&head->grant, 1, 0); - INIT_LIST_HEAD(&head->waiters); - spin_lock_init(&head->lock); -} - -STATIC void -xlog_grant_head_wake_all( - struct xlog_grant_head *head) +STATIC bool +xlog_reserveq_wake( + struct log *log, + int *free_bytes) { struct xlog_ticket *tic; + int need_bytes; - spin_lock(&head->lock); - list_for_each_entry(tic, &head->waiters, t_queue) - wake_up_process(tic->t_task); - spin_unlock(&head->lock); -} - -static inline int -xlog_ticket_reservation( - struct log *log, - struct xlog_grant_head *head, - struct xlog_ticket *tic) -{ - if (head == &log->l_write_head) { - ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); - return tic->t_unit_res; - } else { + list_for_each_entry(tic, &log->l_reserveq, t_queue) { if (tic->t_flags & XLOG_TIC_PERM_RESERV) - return tic->t_unit_res * tic->t_cnt; + need_bytes = tic->t_unit_res * tic->t_cnt; else - return tic->t_unit_res; + need_bytes = tic->t_unit_res; + + if (*free_bytes < need_bytes) + return false; + *free_bytes -= need_bytes; + + trace_xfs_log_grant_wake_up(log, tic); + wake_up(&tic->t_wait); } + + return true; } STATIC bool -xlog_grant_head_wake( +xlog_writeq_wake( struct log *log, - struct xlog_grant_head *head, int *free_bytes) { struct xlog_ticket *tic; int need_bytes; - list_for_each_entry(tic, &head->waiters, t_queue) { - need_bytes = xlog_ticket_reservation(log, head, tic); + list_for_each_entry(tic, &log->l_writeq, t_queue) { + ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); + + need_bytes = tic->t_unit_res; + if (*free_bytes < need_bytes) return false; - *free_bytes -= need_bytes; - trace_xfs_log_grant_wake_up(log, tic); - wake_up_process(tic->t_task); + + trace_xfs_log_regrant_write_wake_up(log, tic); + wake_up(&tic->t_wait); } return true; } STATIC int -xlog_grant_head_wait( +xlog_reserveq_wait( struct log *log, - struct xlog_grant_head *head, struct xlog_ticket *tic, int need_bytes) { - list_add_tail(&tic->t_queue, &head->waiters); + list_add_tail(&tic->t_queue, &log->l_reserveq); do { if (XLOG_FORCED_SHUTDOWN(log)) goto shutdown; xlog_grant_push_ail(log, need_bytes); - __set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock(&head->lock); - XFS_STATS_INC(xs_sleep_logspace); - trace_xfs_log_grant_sleep(log, tic); - schedule(); + + xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); trace_xfs_log_grant_wake(log, tic); - spin_lock(&head->lock); + spin_lock(&log->l_grant_reserve_lock); if (XLOG_FORCED_SHUTDOWN(log)) goto shutdown; - } while (xlog_space_left(log, &head->grant) < need_bytes); + } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); list_del_init(&tic->t_queue); return 0; @@ -240,58 +230,35 @@ xlog_grant_head_wait( return XFS_ERROR(EIO); } -/* - * Atomically get the log space required for a log ticket. - * - * Once a ticket gets put onto head->waiters, it will only return after the - * needed reservation is satisfied. - * - * This function is structured so that it has a lock free fast path. This is - * necessary because every new transaction reservation will come through this - * path. Hence any lock will be globally hot if we take it unconditionally on - * every pass. - * - * As tickets are only ever moved on and off head->waiters under head->lock, we - * only need to take that lock if we are going to add the ticket to the queue - * and sleep. We can avoid taking the lock if the ticket was never added to - * head->waiters because the t_queue list head will be empty and we hold the - * only reference to it so it can safely be checked unlocked. - */ STATIC int -xlog_grant_head_check( +xlog_writeq_wait( struct log *log, - struct xlog_grant_head *head, struct xlog_ticket *tic, - int *need_bytes) + int need_bytes) { - int free_bytes; - int error = 0; + list_add_tail(&tic->t_queue, &log->l_writeq); - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + do { + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + xlog_grant_push_ail(log, need_bytes); - /* - * If there are other waiters on the queue then give them a chance at - * logspace before us. Wake up the first waiters, if we do not wake - * up all the waiters then go to sleep waiting for more free space, - * otherwise try to get some space for this transaction. - */ - *need_bytes = xlog_ticket_reservation(log, head, tic); - free_bytes = xlog_space_left(log, &head->grant); - if (!list_empty_careful(&head->waiters)) { - spin_lock(&head->lock); - if (!xlog_grant_head_wake(log, head, &free_bytes) || - free_bytes < *need_bytes) { - error = xlog_grant_head_wait(log, head, tic, - *need_bytes); - } - spin_unlock(&head->lock); - } else if (free_bytes < *need_bytes) { - spin_lock(&head->lock); - error = xlog_grant_head_wait(log, head, tic, *need_bytes); - spin_unlock(&head->lock); - } + XFS_STATS_INC(xs_sleep_logspace); + trace_xfs_log_regrant_write_sleep(log, tic); - return error; + xlog_wait(&tic->t_wait, &log->l_grant_write_lock); + trace_xfs_log_regrant_write_wake(log, tic); + + spin_lock(&log->l_grant_write_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); + + list_del_init(&tic->t_queue); + return 0; +shutdown: + list_del_init(&tic->t_queue); + return XFS_ERROR(EIO); } static void @@ -318,128 +285,6 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) tic->t_res_num++; } -/* - * Replenish the byte reservation required by moving the grant write head. - */ -int -xfs_log_regrant( - struct xfs_mount *mp, - struct xlog_ticket *tic) -{ - struct log *log = mp->m_log; - int need_bytes; - int error = 0; - - if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); - - XFS_STATS_INC(xs_try_logspace); - - /* - * This is a new transaction on the ticket, so we need to change the - * transaction ID so that the next transaction has a different TID in - * the log. Just add one to the existing tid so that we can see chains - * of rolling transactions in the log easily. - */ - tic->t_tid++; - - xlog_grant_push_ail(log, tic->t_unit_res); - - tic->t_curr_res = tic->t_unit_res; - xlog_tic_reset_res(tic); - - if (tic->t_cnt > 0) - return 0; - - trace_xfs_log_regrant(log, tic); - - error = xlog_grant_head_check(log, &log->l_write_head, tic, - &need_bytes); - if (error) - goto out_error; - - xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); - trace_xfs_log_regrant_exit(log, tic); - xlog_verify_grant_tail(log); - return 0; - -out_error: - /* - * If we are failing, make sure the ticket doesn't have any current - * reservations. We don't want to add this back when the ticket/ - * transaction gets cancelled. - */ - tic->t_curr_res = 0; - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - return error; -} - -/* - * Reserve log space and return a ticket corresponding the reservation. - * - * Each reservation is going to reserve extra space for a log record header. - * When writes happen to the on-disk log, we don't subtract the length of the - * log record header from any reservation. By wasting space in each - * reservation, we prevent over allocation problems. - */ -int -xfs_log_reserve( - struct xfs_mount *mp, - int unit_bytes, - int cnt, - struct xlog_ticket **ticp, - __uint8_t client, - bool permanent, - uint t_type) -{ - struct log *log = mp->m_log; - struct xlog_ticket *tic; - int need_bytes; - int error = 0; - - ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); - - if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); - - XFS_STATS_INC(xs_try_logspace); - - ASSERT(*ticp == NULL); - tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, - KM_SLEEP | KM_MAYFAIL); - if (!tic) - return XFS_ERROR(ENOMEM); - - tic->t_trans_type = t_type; - *ticp = tic; - - xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt); - - trace_xfs_log_reserve(log, tic); - - error = xlog_grant_head_check(log, &log->l_reserve_head, tic, - &need_bytes); - if (error) - goto out_error; - - xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes); - xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); - trace_xfs_log_reserve_exit(log, tic); - xlog_verify_grant_tail(log); - return 0; - -out_error: - /* - * If we are failing, make sure the ticket doesn't have any current - * reservations. We don't want to add this back when the ticket/ - * transaction gets cancelled. - */ - tic->t_curr_res = 0; - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - return error; -} - - /* * NOTES: * @@ -549,6 +394,88 @@ xfs_log_release_iclog( return 0; } +/* + * 1. Reserve an amount of on-disk log space and return a ticket corresponding + * to the reservation. + * 2. Potentially, push buffers at tail of log to disk. + * + * Each reservation is going to reserve extra space for a log record header. + * When writes happen to the on-disk log, we don't subtract the length of the + * log record header from any reservation. By wasting space in each + * reservation, we prevent over allocation problems. + */ +int +xfs_log_reserve( + struct xfs_mount *mp, + int unit_bytes, + int cnt, + struct xlog_ticket **ticket, + __uint8_t client, + uint flags, + uint t_type) +{ + struct log *log = mp->m_log; + struct xlog_ticket *internal_ticket; + int retval = 0; + + ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); + + if (XLOG_FORCED_SHUTDOWN(log)) + return XFS_ERROR(EIO); + + XFS_STATS_INC(xs_try_logspace); + + + if (*ticket != NULL) { + ASSERT(flags & XFS_LOG_PERM_RESERV); + internal_ticket = *ticket; + + /* + * this is a new transaction on the ticket, so we need to + * change the transaction ID so that the next transaction has a + * different TID in the log. Just add one to the existing tid + * so that we can see chains of rolling transactions in the log + * easily. + */ + internal_ticket->t_tid++; + + trace_xfs_log_reserve(log, internal_ticket); + + xlog_grant_push_ail(log, internal_ticket->t_unit_res); + retval = xlog_regrant_write_log_space(log, internal_ticket); + } else { + /* may sleep if need to allocate more tickets */ + internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, + client, flags, + KM_SLEEP|KM_MAYFAIL); + if (!internal_ticket) + return XFS_ERROR(ENOMEM); + internal_ticket->t_trans_type = t_type; + *ticket = internal_ticket; + + trace_xfs_log_reserve(log, internal_ticket); + + xlog_grant_push_ail(log, + (internal_ticket->t_unit_res * + internal_ticket->t_cnt)); + retval = xlog_grant_log_space(log, internal_ticket); + } + + if (unlikely(retval)) { + /* + * If we are failing, make sure the ticket doesn't have any + * current reservations. We don't want to add this back + * when the ticket/ transaction gets cancelled. + */ + internal_ticket->t_curr_res = 0; + /* ungrant will give back unit_res * t_cnt. */ + internal_ticket->t_cnt = 0; + } + + return retval; +} + + /* * Mount a log filesystem * @@ -833,35 +760,64 @@ xfs_log_item_init( INIT_LIST_HEAD(&item->li_cil); } -/* - * Wake up processes waiting for log space after we have moved the log tail. - */ void -xfs_log_space_wake( - struct xfs_mount *mp) +xfs_log_move_tail(xfs_mount_t *mp, + xfs_lsn_t tail_lsn) { - struct log *log = mp->m_log; - int free_bytes; + xlog_ticket_t *tic; + xlog_t *log = mp->m_log; + int need_bytes, free_bytes; if (XLOG_FORCED_SHUTDOWN(log)) return; - if (!list_empty_careful(&log->l_write_head.waiters)) { - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + if (tail_lsn == 0) + tail_lsn = atomic64_read(&log->l_last_sync_lsn); - spin_lock(&log->l_write_head.lock); - free_bytes = xlog_space_left(log, &log->l_write_head.grant); - xlog_grant_head_wake(log, &log->l_write_head, &free_bytes); - spin_unlock(&log->l_write_head.lock); - } + /* tail_lsn == 1 implies that we weren't passed a valid value. */ + if (tail_lsn != 1) + atomic64_set(&log->l_tail_lsn, tail_lsn); - if (!list_empty_careful(&log->l_reserve_head.waiters)) { - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + if (!list_empty_careful(&log->l_writeq)) { +#ifdef DEBUG + if (log->l_flags & XLOG_ACTIVE_RECOVERY) + panic("Recovery problem"); +#endif + spin_lock(&log->l_grant_write_lock); + free_bytes = xlog_space_left(log, &log->l_grant_write_head); + list_for_each_entry(tic, &log->l_writeq, t_queue) { + ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); - spin_lock(&log->l_reserve_head.lock); - free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); - xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes); - spin_unlock(&log->l_reserve_head.lock); + if (free_bytes < tic->t_unit_res && tail_lsn != 1) + break; + tail_lsn = 0; + free_bytes -= tic->t_unit_res; + trace_xfs_log_regrant_write_wake_up(log, tic); + wake_up(&tic->t_wait); + } + spin_unlock(&log->l_grant_write_lock); + } + + if (!list_empty_careful(&log->l_reserveq)) { +#ifdef DEBUG + if (log->l_flags & XLOG_ACTIVE_RECOVERY) + panic("Recovery problem"); +#endif + spin_lock(&log->l_grant_reserve_lock); + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); + list_for_each_entry(tic, &log->l_reserveq, t_queue) { + if (tic->t_flags & XLOG_TIC_PERM_RESERV) + need_bytes = tic->t_unit_res*tic->t_cnt; + else + need_bytes = tic->t_unit_res; + if (free_bytes < need_bytes && tail_lsn != 1) + break; + tail_lsn = 0; + free_bytes -= need_bytes; + trace_xfs_log_grant_wake_up(log, tic); + wake_up(&tic->t_wait); + } + spin_unlock(&log->l_grant_reserve_lock); } } @@ -911,7 +867,21 @@ xfs_log_need_covered(xfs_mount_t *mp) return needed; } -/* +/****************************************************************************** + * + * local routines + * + ****************************************************************************** + */ + +/* xfs_trans_tail_ail returns 0 when there is nothing in the list. + * The log manager must keep track of the last LR which was committed + * to disk. The lsn of this LR will become the new tail_lsn whenever + * xfs_trans_tail_ail returns 0. If we don't do this, we run into + * the situation where stuff could be written into the log but nothing + * was ever in the AIL when asked. Eventually, we panic since the + * tail hits the head. + * * We may be holding the log iclog lock upon entering this routine. */ xfs_lsn_t @@ -921,17 +891,10 @@ xlog_assign_tail_lsn( xfs_lsn_t tail_lsn; struct log *log = mp->m_log; - /* - * To make sure we always have a valid LSN for the log tail we keep - * track of the last LSN which was committed in log->l_last_sync_lsn, - * and use that when the AIL was empty and xfs_ail_min_lsn returns 0. - * - * If the AIL has been emptied we also need to wake any process - * waiting for this condition. - */ tail_lsn = xfs_ail_min_lsn(mp->m_ail); if (!tail_lsn) tail_lsn = atomic64_read(&log->l_last_sync_lsn); + atomic64_set(&log->l_tail_lsn, tail_lsn); return tail_lsn; } @@ -1137,9 +1100,12 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - - xlog_grant_head_init(&log->l_reserve_head); - xlog_grant_head_init(&log->l_write_head); + xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); + xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); + INIT_LIST_HEAD(&log->l_reserveq); + INIT_LIST_HEAD(&log->l_writeq); + spin_lock_init(&log->l_grant_reserve_lock); + spin_lock_init(&log->l_grant_write_lock); error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { @@ -1314,7 +1280,7 @@ xlog_grant_push_ail( ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); free_blocks = BTOBBT(free_bytes); /* @@ -1446,8 +1412,8 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); - xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); + xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); + xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); @@ -2600,6 +2566,119 @@ xlog_state_get_iclog_space(xlog_t *log, return 0; } /* xlog_state_get_iclog_space */ +/* + * Atomically get the log space required for a log ticket. + * + * Once a ticket gets put onto the reserveq, it will only return after the + * needed reservation is satisfied. + * + * This function is structured so that it has a lock free fast path. This is + * necessary because every new transaction reservation will come through this + * path. Hence any lock will be globally hot if we take it unconditionally on + * every pass. + * + * As tickets are only ever moved on and off the reserveq under the + * l_grant_reserve_lock, we only need to take that lock if we are going to add + * the ticket to the queue and sleep. We can avoid taking the lock if the ticket + * was never added to the reserveq because the t_queue list head will be empty + * and we hold the only reference to it so it can safely be checked unlocked. + */ +STATIC int +xlog_grant_log_space( + struct log *log, + struct xlog_ticket *tic) +{ + int free_bytes, need_bytes; + int error = 0; + + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + + trace_xfs_log_grant_enter(log, tic); + + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. + */ + need_bytes = tic->t_unit_res; + if (tic->t_flags & XFS_LOG_PERM_RESERV) + need_bytes *= tic->t_ocnt; + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); + if (!list_empty_careful(&log->l_reserveq)) { + spin_lock(&log->l_grant_reserve_lock); + if (!xlog_reserveq_wake(log, &free_bytes) || + free_bytes < need_bytes) + error = xlog_reserveq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_reserve_lock); + } else if (free_bytes < need_bytes) { + spin_lock(&log->l_grant_reserve_lock); + error = xlog_reserveq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_reserve_lock); + } + if (error) + return error; + + xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); + xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); + trace_xfs_log_grant_exit(log, tic); + xlog_verify_grant_tail(log); + return 0; +} + +/* + * Replenish the byte reservation required by moving the grant write head. + * + * Similar to xlog_grant_log_space, the function is structured to have a lock + * free fast path. + */ +STATIC int +xlog_regrant_write_log_space( + struct log *log, + struct xlog_ticket *tic) +{ + int free_bytes, need_bytes; + int error = 0; + + tic->t_curr_res = tic->t_unit_res; + xlog_tic_reset_res(tic); + + if (tic->t_cnt > 0) + return 0; + + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + + trace_xfs_log_regrant_write_enter(log, tic); + + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. + */ + need_bytes = tic->t_unit_res; + free_bytes = xlog_space_left(log, &log->l_grant_write_head); + if (!list_empty_careful(&log->l_writeq)) { + spin_lock(&log->l_grant_write_lock); + if (!xlog_writeq_wake(log, &free_bytes) || + free_bytes < need_bytes) + error = xlog_writeq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_write_lock); + } else if (free_bytes < need_bytes) { + spin_lock(&log->l_grant_write_lock); + error = xlog_writeq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_write_lock); + } + + if (error) + return error; + + xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); + trace_xfs_log_regrant_write_exit(log, tic); + xlog_verify_grant_tail(log); + return 0; +} + /* The first cnt-1 times through here we don't need to * move the grant write head because the permanent * reservation has reserved cnt times the unit amount. @@ -2616,9 +2695,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, if (ticket->t_cnt > 0) ticket->t_cnt--; - xlog_grant_sub_space(log, &log->l_reserve_head.grant, + xlog_grant_sub_space(log, &log->l_grant_reserve_head, ticket->t_curr_res); - xlog_grant_sub_space(log, &log->l_write_head.grant, + xlog_grant_sub_space(log, &log->l_grant_write_head, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); @@ -2629,7 +2708,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, if (ticket->t_cnt > 0) return; - xlog_grant_add_space(log, &log->l_reserve_head.grant, + xlog_grant_add_space(log, &log->l_grant_reserve_head, ticket->t_unit_res); trace_xfs_log_regrant_reserve_exit(log, ticket); @@ -2675,13 +2754,14 @@ xlog_ungrant_log_space(xlog_t *log, bytes += ticket->t_unit_res*ticket->t_cnt; } - xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); - xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); + xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); + xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); trace_xfs_log_ungrant_exit(log, ticket); - xfs_log_space_wake(log->l_mp); -} + xfs_log_move_tail(log->l_mp, 1); +} /* xlog_ungrant_log_space */ + /* * Flush iclog to disk if this is the last reference to the given iclog and @@ -3139,7 +3219,7 @@ xlog_ticket_alloc( int unit_bytes, int cnt, char client, - bool permanent, + uint xflags, int alloc_flags) { struct xlog_ticket *tic; @@ -3233,7 +3313,6 @@ xlog_ticket_alloc( } atomic_set(&tic->t_ref, 1); - tic->t_task = current; INIT_LIST_HEAD(&tic->t_queue); tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; @@ -3243,8 +3322,9 @@ xlog_ticket_alloc( tic->t_clientid = client; tic->t_flags = XLOG_TIC_INITED; tic->t_trans_type = 0; - if (permanent) + if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; + init_waitqueue_head(&tic->t_wait); xlog_tic_reset_res(tic); @@ -3300,7 +3380,7 @@ xlog_verify_grant_tail( int tail_cycle, tail_blocks; int cycle, space; - xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space); + xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { if (cycle - 1 != tail_cycle && @@ -3502,6 +3582,7 @@ xfs_log_force_umount( struct xfs_mount *mp, int logerror) { + xlog_ticket_t *tic; xlog_t *log; int retval; @@ -3569,8 +3650,15 @@ xfs_log_force_umount( * we don't enqueue anything once the SHUTDOWN flag is set, and this * action is protected by the grant locks. */ - xlog_grant_head_wake_all(&log->l_reserve_head); - xlog_grant_head_wake_all(&log->l_write_head); + spin_lock(&log->l_grant_reserve_lock); + list_for_each_entry(tic, &log->l_reserveq, t_queue) + wake_up(&tic->t_wait); + spin_unlock(&log->l_grant_reserve_lock); + + spin_lock(&log->l_grant_write_lock); + list_for_each_entry(tic, &log->l_writeq, t_queue) + wake_up(&tic->t_wait); + spin_unlock(&log->l_grant_write_lock); if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); diff --git a/trunk/fs/xfs/xfs_log.h b/trunk/fs/xfs/xfs_log.h index 2c622bedb302..2aee3b22d29c 100644 --- a/trunk/fs/xfs/xfs_log.h +++ b/trunk/fs/xfs/xfs_log.h @@ -52,6 +52,15 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) */ #define XFS_LOG_REL_PERM_RESERV 0x1 +/* + * Flags to xfs_log_reserve() + * + * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are + * performed against this type of reservation, the reservation + * is not decreased. Long running transactions should use this. + */ +#define XFS_LOG_PERM_RESERV 0x2 + /* * Flags to xfs_log_force() * @@ -151,8 +160,8 @@ int xfs_log_mount(struct xfs_mount *mp, xfs_daddr_t start_block, int num_bblocks); int xfs_log_mount_finish(struct xfs_mount *mp); -xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); -void xfs_log_space_wake(struct xfs_mount *mp); +void xfs_log_move_tail(struct xfs_mount *mp, + xfs_lsn_t tail_lsn); int xfs_log_notify(struct xfs_mount *mp, struct xlog_in_core *iclog, xfs_log_callback_t *callback_entry); @@ -163,9 +172,8 @@ int xfs_log_reserve(struct xfs_mount *mp, int count, struct xlog_ticket **ticket, __uint8_t clientid, - bool permanent, + uint flags, uint t_type); -int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); int xfs_log_unmount_write(struct xfs_mount *mp); void xfs_log_unmount(struct xfs_mount *mp); int xfs_log_force_umount(struct xfs_mount *mp, int logerror); diff --git a/trunk/fs/xfs/xfs_log_priv.h b/trunk/fs/xfs/xfs_log_priv.h index 2152900b79d4..2d3b6a498d63 100644 --- a/trunk/fs/xfs/xfs_log_priv.h +++ b/trunk/fs/xfs/xfs_log_priv.h @@ -239,8 +239,8 @@ typedef struct xlog_res { } xlog_res_t; typedef struct xlog_ticket { + wait_queue_head_t t_wait; /* ticket wait queue */ struct list_head t_queue; /* reserve/write queue */ - struct task_struct *t_task; /* task that owns this ticket */ xlog_tid_t t_tid; /* transaction identifier : 4 */ atomic_t t_ref; /* ticket reference count : 4 */ int t_curr_res; /* current reservation in bytes : 4 */ @@ -469,16 +469,6 @@ struct xfs_cil { #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) -/* - * ticket grant locks, queues and accounting have their own cachlines - * as these are quite hot and can be operated on concurrently. - */ -struct xlog_grant_head { - spinlock_t lock ____cacheline_aligned_in_smp; - struct list_head waiters; - atomic64_t grant; -}; - /* * The reservation head lsn is not made up of a cycle number and block number. * Instead, it uses a cycle number and byte number. Logs don't expect to @@ -530,8 +520,17 @@ typedef struct log { /* lsn of 1st LR with unflushed * buffers */ atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; - struct xlog_grant_head l_reserve_head; - struct xlog_grant_head l_write_head; + /* + * ticket grant locks, queues and accounting have their own cachlines + * as these are quite hot and can be operated on concurrently. + */ + spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; + struct list_head l_reserveq; + atomic64_t l_grant_reserve_head; + + spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; + struct list_head l_writeq; + atomic64_t l_grant_write_head; /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG @@ -546,13 +545,14 @@ typedef struct log { #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) /* common routines */ +extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); extern int xlog_recover(xlog_t *log); extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern kmem_zone_t *xfs_log_ticket_zone; struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, - int count, char client, bool permanent, + int count, char client, uint xflags, int alloc_flags); diff --git a/trunk/fs/xfs/xfs_log_recover.c b/trunk/fs/xfs/xfs_log_recover.c index 7c75c7374d5a..0ed9ee77937c 100644 --- a/trunk/fs/xfs/xfs_log_recover.c +++ b/trunk/fs/xfs/xfs_log_recover.c @@ -965,9 +965,9 @@ xlog_find_tail( log->l_curr_cycle++; atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, + xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, BBTOB(log->l_curr_block)); - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, + xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, BBTOB(log->l_curr_block)); /* @@ -3695,7 +3695,7 @@ xlog_do_recover( /* Convert superblock from on-disk format */ sbp = &log->l_mp->m_sb; - xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp)); + xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); ASSERT(xfs_sb_good_version(sbp)); xfs_buf_relse(bp); diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c index 1ffead4b2296..d06afbc3540d 100644 --- a/trunk/fs/xfs/xfs_mount.c +++ b/trunk/fs/xfs/xfs_mount.c @@ -158,7 +158,7 @@ xfs_uuid_mount( out_duplicate: mutex_unlock(&xfs_uuid_table_mutex); - xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); + xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); return XFS_ERROR(EINVAL); } @@ -553,11 +553,9 @@ xfs_initialize_perag( void xfs_sb_from_disk( - struct xfs_mount *mp, + xfs_sb_t *to, xfs_dsb_t *from) { - struct xfs_sb *to = &mp->m_sb; - to->sb_magicnum = be32_to_cpu(from->sb_magicnum); to->sb_blocksize = be32_to_cpu(from->sb_blocksize); to->sb_dblocks = be64_to_cpu(from->sb_dblocks); @@ -695,7 +693,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) * Initialize the mount structure from the superblock. * But first do some basic consistency checking. */ - xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp)); + xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); if (error) { if (loud) diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h index 9eba73887829..19f69e232509 100644 --- a/trunk/fs/xfs/xfs_mount.h +++ b/trunk/fs/xfs/xfs_mount.h @@ -211,9 +211,6 @@ typedef struct xfs_mount { struct shrinker m_inode_shrink; /* inode reclaim shrinker */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ - - struct workqueue_struct *m_data_workqueue; - struct workqueue_struct *m_unwritten_workqueue; } xfs_mount_t; /* @@ -398,7 +395,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *); extern void xfs_mod_sb(struct xfs_trans *, __int64_t); extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, xfs_agnumber_t *); -extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *); +extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); #endif /* __XFS_MOUNT_H__ */ diff --git a/trunk/fs/xfs/xfs_qm.c b/trunk/fs/xfs/xfs_qm.c index 55c6afedc879..c436def733bf 100644 --- a/trunk/fs/xfs/xfs_qm.c +++ b/trunk/fs/xfs/xfs_qm.c @@ -48,189 +48,194 @@ * quota functionality, including maintaining the freelist and hash * tables of dquots. */ +struct mutex xfs_Gqm_lock; +struct xfs_qm *xfs_Gqm; + +kmem_zone_t *qm_dqzone; +kmem_zone_t *qm_dqtrxzone; + +STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); +STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); + STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); +static struct shrinker xfs_qm_shaker = { + .shrink = xfs_qm_shake, + .seeks = DEFAULT_SEEKS, +}; + /* - * We use the batch lookup interface to iterate over the dquots as it - * currently is the only interface into the radix tree code that allows - * fuzzy lookups instead of exact matches. Holding the lock over multiple - * operations is fine as all callers are used either during mount/umount - * or quotaoff. + * Initialize the XQM structure. + * Note that there is not one quota manager per file system. */ -#define XFS_DQ_LOOKUP_BATCH 32 - -STATIC int -xfs_qm_dquot_walk( - struct xfs_mount *mp, - int type, - int (*execute)(struct xfs_dquot *dqp)) +STATIC struct xfs_qm * +xfs_Gqm_init(void) { - struct xfs_quotainfo *qi = mp->m_quotainfo; - struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); - uint32_t next_index; - int last_error = 0; - int skipped; - int nr_found; - -restart: - skipped = 0; - next_index = 0; - nr_found = 0; - - while (1) { - struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; - int error = 0; - int i; - - mutex_lock(&qi->qi_tree_lock); - nr_found = radix_tree_gang_lookup(tree, (void **)batch, - next_index, XFS_DQ_LOOKUP_BATCH); - if (!nr_found) { - mutex_unlock(&qi->qi_tree_lock); - break; - } + xfs_dqhash_t *udqhash, *gdqhash; + xfs_qm_t *xqm; + size_t hsize; + uint i; - for (i = 0; i < nr_found; i++) { - struct xfs_dquot *dqp = batch[i]; + /* + * Initialize the dquot hash tables. + */ + udqhash = kmem_zalloc_greedy(&hsize, + XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); + if (!udqhash) + goto out; - next_index = be32_to_cpu(dqp->q_core.d_id) + 1; + gdqhash = kmem_zalloc_large(hsize); + if (!gdqhash) + goto out_free_udqhash; - error = execute(batch[i]); - if (error == EAGAIN) { - skipped++; - continue; - } - if (error && last_error != EFSCORRUPTED) - last_error = error; - } + hsize /= sizeof(xfs_dqhash_t); - mutex_unlock(&qi->qi_tree_lock); + xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); + xqm->qm_dqhashmask = hsize - 1; + xqm->qm_usr_dqhtable = udqhash; + xqm->qm_grp_dqhtable = gdqhash; + ASSERT(xqm->qm_usr_dqhtable != NULL); + ASSERT(xqm->qm_grp_dqhtable != NULL); - /* bail out if the filesystem is corrupted. */ - if (last_error == EFSCORRUPTED) { - skipped = 0; - break; - } + for (i = 0; i < hsize; i++) { + xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); + xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); } - if (skipped) { - delay(1); - goto restart; - } + /* + * Freelist of all dquots of all file systems + */ + INIT_LIST_HEAD(&xqm->qm_dqfrlist); + xqm->qm_dqfrlist_cnt = 0; + mutex_init(&xqm->qm_dqfrlist_lock); - return last_error; -} + /* + * dquot zone. we register our own low-memory callback. + */ + if (!qm_dqzone) { + xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), + "xfs_dquots"); + qm_dqzone = xqm->qm_dqzone; + } else + xqm->qm_dqzone = qm_dqzone; + register_shrinker(&xfs_qm_shaker); + + /* + * The t_dqinfo portion of transactions. + */ + if (!qm_dqtrxzone) { + xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), + "xfs_dqtrx"); + qm_dqtrxzone = xqm->qm_dqtrxzone; + } else + xqm->qm_dqtrxzone = qm_dqtrxzone; + + atomic_set(&xqm->qm_totaldquots, 0); + xqm->qm_nrefs = 0; + return xqm; + + out_free_udqhash: + kmem_free_large(udqhash); + out: + return NULL; +} /* - * Purge a dquot from all tracking data structures and free it. + * Destroy the global quota manager when its reference count goes to zero. */ -STATIC int -xfs_qm_dqpurge( - struct xfs_dquot *dqp) +STATIC void +xfs_qm_destroy( + struct xfs_qm *xqm) { - struct xfs_mount *mp = dqp->q_mount; - struct xfs_quotainfo *qi = mp->m_quotainfo; - struct xfs_dquot *gdqp = NULL; + int hsize, i; - xfs_dqlock(dqp); - if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { - xfs_dqunlock(dqp); - return EAGAIN; - } + ASSERT(xqm != NULL); + ASSERT(xqm->qm_nrefs == 0); - /* - * If this quota has a group hint attached, prepare for releasing it - * now. - */ - gdqp = dqp->q_gdquot; - if (gdqp) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } + unregister_shrinker(&xfs_qm_shaker); - dqp->dq_flags |= XFS_DQ_FREEING; + mutex_lock(&xqm->qm_dqfrlist_lock); + ASSERT(list_empty(&xqm->qm_dqfrlist)); + mutex_unlock(&xqm->qm_dqfrlist_lock); - /* - * If we're turning off quotas, we have to make sure that, for - * example, we don't delete quota disk blocks while dquots are - * in the process of getting written to those disk blocks. - * This dquot might well be on AIL, and we can't leave it there - * if we're turning off quotas. Basically, we need this flush - * lock, and are willing to block on it. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* - * Block on the flush lock after nudging dquot buffer, - * if it is incore. - */ - xfs_dqflock_pushbuf_wait(dqp); + hsize = xqm->qm_dqhashmask + 1; + for (i = 0; i < hsize; i++) { + xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); + xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); } + kmem_free_large(xqm->qm_usr_dqhtable); + kmem_free_large(xqm->qm_grp_dqhtable); + xqm->qm_usr_dqhtable = NULL; + xqm->qm_grp_dqhtable = NULL; + xqm->qm_dqhashmask = 0; + kmem_free(xqm); +} + +/* + * Called at mount time to let XQM know that another file system is + * starting quotas. This isn't crucial information as the individual mount + * structures are pretty independent, but it helps the XQM keep a + * global view of what's going on. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_hold_quotafs_ref( + struct xfs_mount *mp) +{ /* - * If we are turning this type of quotas off, we don't care - * about the dirty metadata sitting in this dquot. OTOH, if - * we're unmounting, we do care, so we flush it and wait. + * Need to lock the xfs_Gqm structure for things like this. For example, + * the structure could disappear between the entry to this routine and + * a HOLD operation if not locked. */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; + mutex_lock(&xfs_Gqm_lock); - /* - * We don't care about getting disk errors here. We need - * to purge this dquot anyway, so we go ahead regardless. - */ - error = xfs_qm_dqflush(dqp, SYNC_WAIT); - if (error) - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - xfs_dqflock(dqp); + if (!xfs_Gqm) { + xfs_Gqm = xfs_Gqm_init(); + if (!xfs_Gqm) { + mutex_unlock(&xfs_Gqm_lock); + return ENOMEM; + } } - ASSERT(atomic_read(&dqp->q_pincount) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(mp) || - !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); - - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - - radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), - be32_to_cpu(dqp->q_core.d_id)); - qi->qi_dquots--; - /* - * We move dquots to the freelist as soon as their reference count - * hits zero, so it really should be on the freelist here. + * We can keep a list of all filesystems with quotas mounted for + * debugging and statistical purposes, but ... + * Just take a reference and get out. */ - mutex_lock(&qi->qi_lru_lock); - ASSERT(!list_empty(&dqp->q_lru)); - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - mutex_unlock(&qi->qi_lru_lock); + xfs_Gqm->qm_nrefs++; + mutex_unlock(&xfs_Gqm_lock); - xfs_qm_dqdestroy(dqp); - - if (gdqp) - xfs_qm_dqput(gdqp); return 0; } + /* - * Purge the dquot cache. + * Release the reference that a filesystem took at mount time, + * so that we know when we need to destroy the entire quota manager. */ -void -xfs_qm_dqpurge_all( - struct xfs_mount *mp, - uint flags) +/* ARGSUSED */ +STATIC void +xfs_qm_rele_quotafs_ref( + struct xfs_mount *mp) { - if (flags & XFS_QMOPT_UQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge); - if (flags & XFS_QMOPT_GQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge); - if (flags & XFS_QMOPT_PQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge); + ASSERT(xfs_Gqm); + ASSERT(xfs_Gqm->qm_nrefs > 0); + + /* + * Destroy the entire XQM. If somebody mounts with quotaon, this'll + * be restarted. + */ + mutex_lock(&xfs_Gqm_lock); + if (--xfs_Gqm->qm_nrefs == 0) { + xfs_qm_destroy(xfs_Gqm); + xfs_Gqm = NULL; + } + mutex_unlock(&xfs_Gqm_lock); } /* @@ -371,6 +376,175 @@ xfs_qm_unmount_quotas( } } +/* + * Flush all dquots of the given file system to disk. The dquots are + * _not_ purged from memory here, just their data written to disk. + */ +STATIC int +xfs_qm_dqflush_all( + struct xfs_mount *mp) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + int recl; + struct xfs_dquot *dqp; + int error; + + if (!q) + return 0; +again: + mutex_lock(&q->qi_dqlist_lock); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if ((dqp->dq_flags & XFS_DQ_FREEING) || + !XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* XXX a sentinel would be better */ + recl = q->qi_dqreclaims; + if (!xfs_dqflock_nowait(dqp)) { + /* + * If we can't grab the flush lock then check + * to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + xfs_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write. + */ + mutex_unlock(&q->qi_dqlist_lock); + error = xfs_qm_dqflush(dqp, 0); + xfs_dqunlock(dqp); + if (error) + return error; + + mutex_lock(&q->qi_dqlist_lock); + if (recl != q->qi_dqreclaims) { + mutex_unlock(&q->qi_dqlist_lock); + /* XXX restart limit */ + goto again; + } + } + + mutex_unlock(&q->qi_dqlist_lock); + /* return ! busy */ + return 0; +} + +/* + * Release the group dquot pointers the user dquots may be + * carrying around as a hint. mplist is locked on entry and exit. + */ +STATIC void +xfs_qm_detach_gdquots( + struct xfs_mount *mp) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_dquot *dqp, *gdqp; + + again: + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_FREEING) { + xfs_dqunlock(dqp); + mutex_unlock(&q->qi_dqlist_lock); + delay(1); + mutex_lock(&q->qi_dqlist_lock); + goto again; + } + + gdqp = dqp->q_gdquot; + if (gdqp) + dqp->q_gdquot = NULL; + xfs_dqunlock(dqp); + + if (gdqp) + xfs_qm_dqrele(gdqp); + } +} + +/* + * Go through all the incore dquots of this file system and take them + * off the mplist and hashlist, if the dquot type matches the dqtype + * parameter. This is used when turning off quota accounting for + * users and/or groups, as well as when the filesystem is unmounting. + */ +STATIC int +xfs_qm_dqpurge_int( + struct xfs_mount *mp, + uint flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_dquot *dqp, *n; + uint dqtype; + int nmisses = 0; + LIST_HEAD (dispose_list); + + if (!q) + return 0; + + dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; + dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; + dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; + + mutex_lock(&q->qi_dqlist_lock); + + /* + * In the first pass through all incore dquots of this filesystem, + * we release the group dquot pointers the user dquots may be + * carrying around as a hint. We need to do this irrespective of + * what's being turned off. + */ + xfs_qm_detach_gdquots(mp); + + /* + * Try to get rid of all of the unwanted dquots. + */ + list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if ((dqp->dq_flags & dqtype) != 0 && + !(dqp->dq_flags & XFS_DQ_FREEING)) { + if (dqp->q_nrefs == 0) { + dqp->dq_flags |= XFS_DQ_FREEING; + list_move_tail(&dqp->q_mplist, &dispose_list); + } else + nmisses++; + } + xfs_dqunlock(dqp); + } + mutex_unlock(&q->qi_dqlist_lock); + + list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist) + xfs_qm_dqpurge(dqp); + + return nmisses; +} + +int +xfs_qm_dqpurge_all( + xfs_mount_t *mp, + uint flags) +{ + int ndquots; + + /* + * Purge the dquot cache. + * None of the dquots should really be busy at this point. + */ + if (mp->m_quotainfo) { + while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { + delay(ndquots * 10); + } + } + return 0; +} + STATIC int xfs_qm_dqattach_one( xfs_inode_t *ip, @@ -608,6 +782,14 @@ xfs_qm_dqdetach( } } +/* + * The hash chains and the mplist use the same xfs_dqhash structure as + * their list head, but we can take the mplist qh_lock and one of the + * hash qh_locks at the same time without any problem as they aren't + * related. + */ +static struct lock_class_key xfs_quota_mplist_class; + /* * This initializes all the quota information that's kept in the * mount structure @@ -622,6 +804,13 @@ xfs_qm_init_quotainfo( ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + /* + * Tell XQM that we exist as soon as possible. + */ + if ((error = xfs_qm_hold_quotafs_ref(mp))) { + return error; + } + qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); /* @@ -634,13 +823,11 @@ xfs_qm_init_quotainfo( return error; } - INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); - INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); - mutex_init(&qinf->qi_tree_lock); + INIT_LIST_HEAD(&qinf->qi_dqlist); + mutex_init(&qinf->qi_dqlist_lock); + lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); - INIT_LIST_HEAD(&qinf->qi_lru_list); - qinf->qi_lru_count = 0; - mutex_init(&qinf->qi_lru_lock); + qinf->qi_dqreclaims = 0; /* mutex used to serialize quotaoffs */ mutex_init(&qinf->qi_quotaofflock); @@ -707,9 +894,6 @@ xfs_qm_init_quotainfo( qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } - qinf->qi_shrinker.shrink = xfs_qm_shake; - qinf->qi_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&qinf->qi_shrinker); return 0; } @@ -727,8 +911,17 @@ xfs_qm_destroy_quotainfo( qi = mp->m_quotainfo; ASSERT(qi != NULL); + ASSERT(xfs_Gqm != NULL); + + /* + * Release the reference that XQM kept, so that we know + * when the XQM structure should be freed. We cannot assume + * that xfs_Gqm is non-null after this point. + */ + xfs_qm_rele_quotafs_ref(mp); - unregister_shrinker(&qi->qi_shrinker); + ASSERT(list_empty(&qi->qi_dqlist)); + mutex_destroy(&qi->qi_dqlist_lock); if (qi->qi_uquotaip) { IRELE(qi->qi_uquotaip); @@ -743,6 +936,30 @@ xfs_qm_destroy_quotainfo( mp->m_quotainfo = NULL; } + + +/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ + +/* ARGSUSED */ +STATIC void +xfs_qm_list_init( + xfs_dqlist_t *list, + char *str, + int n) +{ + mutex_init(&list->qh_lock); + INIT_LIST_HEAD(&list->qh_list); + list->qh_version = 0; + list->qh_nelems = 0; +} + +STATIC void +xfs_qm_list_destroy( + xfs_dqlist_t *list) +{ + mutex_destroy(&(list->qh_lock)); +} + /* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes @@ -1180,28 +1397,6 @@ xfs_qm_dqusage_adjust( return error; } -STATIC int -xfs_qm_flush_one( - struct xfs_dquot *dqp) -{ - int error = 0; - - xfs_dqlock(dqp); - if (dqp->dq_flags & XFS_DQ_FREEING) - goto out_unlock; - if (!XFS_DQ_IS_DIRTY(dqp)) - goto out_unlock; - - if (!xfs_dqflock_nowait(dqp)) - xfs_dqflock_pushbuf_wait(dqp); - - error = xfs_qm_dqflush(dqp, 0); - -out_unlock: - xfs_dqunlock(dqp); - return error; -} - /* * Walk thru all the filesystem inodes and construct a consistent view * of the disk quota world. If the quotacheck fails, disable quotas. @@ -1210,7 +1405,7 @@ int xfs_qm_quotacheck( xfs_mount_t *mp) { - int done, count, error, error2; + int done, count, error; xfs_ino_t lastino; size_t structsz; xfs_inode_t *uip, *gip; @@ -1224,6 +1419,12 @@ xfs_qm_quotacheck( ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + /* + * There should be no cached dquots. The (simplistic) quotacheck + * algorithm doesn't like that. + */ + ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); + xfs_notice(mp, "Quotacheck needed: Please wait."); /* @@ -1262,21 +1463,12 @@ xfs_qm_quotacheck( } while (!done); /* - * We've made all the changes that we need to make incore. Flush them - * down to disk buffers if everything was updated successfully. + * We've made all the changes that we need to make incore. + * Flush them down to disk buffers if everything was updated + * successfully. */ - if (XFS_IS_UQUOTA_ON(mp)) - error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one); - if (XFS_IS_GQUOTA_ON(mp)) { - error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one); - if (!error) - error = error2; - } - if (XFS_IS_PQUOTA_ON(mp)) { - error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one); - if (!error) - error = error2; - } + if (!error) + error = xfs_qm_dqflush_all(mp); /* * We can get this error if we couldn't do a dquot allocation inside @@ -1304,7 +1496,7 @@ xfs_qm_quotacheck( * quotachecked status, since we won't be doing accounting for * that type anymore. */ - mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD; + mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); mp->m_qflags |= flags; error_return: @@ -1316,6 +1508,7 @@ xfs_qm_quotacheck( * We must turn off quotas. */ ASSERT(mp->m_quotainfo != NULL); + ASSERT(xfs_Gqm != NULL); xfs_qm_destroy_quotainfo(mp); if (xfs_mount_reset_sbqflags(mp)) { xfs_warn(mp, @@ -1411,12 +1604,16 @@ xfs_qm_dqfree_one( struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; - mutex_lock(&qi->qi_tree_lock); - radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), - be32_to_cpu(dqp->q_core.d_id)); + mutex_lock(&dqp->q_hash->qh_lock); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + mutex_unlock(&dqp->q_hash->qh_lock); + mutex_lock(&qi->qi_dqlist_lock); + list_del_init(&dqp->q_mplist); qi->qi_dquots--; - mutex_unlock(&qi->qi_tree_lock); + qi->qi_dqreclaims++; + mutex_unlock(&qi->qi_dqlist_lock); xfs_qm_dqdestroy(dqp); } @@ -1427,7 +1624,6 @@ xfs_qm_dqreclaim_one( struct list_head *dispose_list) { struct xfs_mount *mp = dqp->q_mount; - struct xfs_quotainfo *qi = mp->m_quotainfo; int error; if (!xfs_dqlock_nowait(dqp)) @@ -1441,14 +1637,16 @@ xfs_qm_dqreclaim_one( xfs_dqunlock(dqp); trace_xfs_dqreclaim_want(dqp); - XFS_STATS_INC(xs_qm_dqwants); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; return; } + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); + /* * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. @@ -1490,12 +1688,11 @@ xfs_qm_dqreclaim_one( xfs_dqunlock(dqp); ASSERT(dqp->q_nrefs == 0); - list_move_tail(&dqp->q_lru, dispose_list); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); + list_move_tail(&dqp->q_freelist, dispose_list); + xfs_Gqm->qm_dqfrlist_cnt--; trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); return; out_busy: @@ -1504,10 +1701,10 @@ xfs_qm_dqreclaim_one( /* * Move the dquot to the tail of the list so that we don't spin on it. */ - list_move_tail(&dqp->q_lru, &qi->qi_lru_list); + list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); } STATIC int @@ -1515,8 +1712,6 @@ xfs_qm_shake( struct shrinker *shrink, struct shrink_control *sc) { - struct xfs_quotainfo *qi = - container_of(shrink, struct xfs_quotainfo, qi_shrinker); int nr_to_scan = sc->nr_to_scan; LIST_HEAD (dispose_list); struct xfs_dquot *dqp; @@ -1526,23 +1721,24 @@ xfs_qm_shake( if (!nr_to_scan) goto out; - mutex_lock(&qi->qi_lru_lock); - while (!list_empty(&qi->qi_lru_list)) { + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { if (nr_to_scan-- <= 0) break; - dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, - q_lru); + dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, + q_freelist); xfs_qm_dqreclaim_one(dqp, &dispose_list); } - mutex_unlock(&qi->qi_lru_lock); + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); while (!list_empty(&dispose_list)) { - dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); - list_del_init(&dqp->q_lru); + dqp = list_first_entry(&dispose_list, struct xfs_dquot, + q_freelist); + list_del_init(&dqp->q_freelist); xfs_qm_dqfree_one(dqp); } out: - return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; + return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; } /* diff --git a/trunk/fs/xfs/xfs_qm.h b/trunk/fs/xfs/xfs_qm.h index 44b858b79d71..9a9b997e1a0a 100644 --- a/trunk/fs/xfs/xfs_qm.h +++ b/trunk/fs/xfs/xfs_qm.h @@ -21,10 +21,21 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_quota_priv.h" +#include "xfs_qm_stats.h" +struct xfs_qm; struct xfs_inode; -extern struct kmem_zone *xfs_qm_dqtrxzone; +extern struct mutex xfs_Gqm_lock; +extern struct xfs_qm *xfs_Gqm; +extern kmem_zone_t *qm_dqzone; +extern kmem_zone_t *qm_dqtrxzone; + +/* + * Dquot hashtable constants/threshold values. + */ +#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) /* * This defines the unit of allocation of dquots. @@ -37,20 +48,36 @@ extern struct kmem_zone *xfs_qm_dqtrxzone; */ #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 +typedef xfs_dqhash_t xfs_dqlist_t; + +/* + * Quota Manager (global) structure. Lives only in core. + */ +typedef struct xfs_qm { + xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ + xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ + uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ + struct list_head qm_dqfrlist; /* freelist of dquots */ + struct mutex qm_dqfrlist_lock; + int qm_dqfrlist_cnt; + atomic_t qm_totaldquots; /* total incore dquots */ + uint qm_nrefs; /* file systems with quota on */ + kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ + kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ +} xfs_qm_t; + /* * Various quota information for individual filesystems. * The mount structure keeps a pointer to this. */ typedef struct xfs_quotainfo { - struct radix_tree_root qi_uquota_tree; - struct radix_tree_root qi_gquota_tree; - struct mutex qi_tree_lock; xfs_inode_t *qi_uquotaip; /* user quota inode */ xfs_inode_t *qi_gquotaip; /* group quota inode */ - struct list_head qi_lru_list; - struct mutex qi_lru_lock; - int qi_lru_count; + struct list_head qi_dqlist; /* all dquots in filesys */ + struct mutex qi_dqlist_lock; int qi_dquots; + int qi_dqreclaims; /* a change here indicates + a removal in the dqlist */ time_t qi_btimelimit; /* limit for blks timer */ time_t qi_itimelimit; /* limit for inodes timer */ time_t qi_rtbtimelimit;/* limit for rt blks timer */ @@ -66,14 +93,8 @@ typedef struct xfs_quotainfo { xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ - struct shrinker qi_shrinker; } xfs_quotainfo_t; -#define XFS_DQUOT_TREE(qi, type) \ - ((type & XFS_DQ_USER) ? \ - &((qi)->qi_uquota_tree) : \ - &((qi)->qi_gquota_tree)) - extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, @@ -109,7 +130,7 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); /* dquot stuff */ -extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); +extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); /* quota ops */ diff --git a/trunk/fs/xfs/xfs_qm_bhv.c b/trunk/fs/xfs/xfs_qm_bhv.c index e6986b5d80d8..a0a829addca9 100644 --- a/trunk/fs/xfs/xfs_qm_bhv.c +++ b/trunk/fs/xfs/xfs_qm_bhv.c @@ -40,28 +40,28 @@ STATIC void xfs_fill_statvfs_from_dquot( struct kstatfs *statp, - struct xfs_dquot *dqp) + xfs_disk_dquot_t *dp) { __uint64_t limit; - limit = dqp->q_core.d_blk_softlimit ? - be64_to_cpu(dqp->q_core.d_blk_softlimit) : - be64_to_cpu(dqp->q_core.d_blk_hardlimit); + limit = dp->d_blk_softlimit ? + be64_to_cpu(dp->d_blk_softlimit) : + be64_to_cpu(dp->d_blk_hardlimit); if (limit && statp->f_blocks > limit) { statp->f_blocks = limit; statp->f_bfree = statp->f_bavail = - (statp->f_blocks > dqp->q_res_bcount) ? - (statp->f_blocks - dqp->q_res_bcount) : 0; + (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? + (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; } - limit = dqp->q_core.d_ino_softlimit ? - be64_to_cpu(dqp->q_core.d_ino_softlimit) : - be64_to_cpu(dqp->q_core.d_ino_hardlimit); + limit = dp->d_ino_softlimit ? + be64_to_cpu(dp->d_ino_softlimit) : + be64_to_cpu(dp->d_ino_hardlimit); if (limit && statp->f_files > limit) { statp->f_files = limit; statp->f_ffree = - (statp->f_files > dqp->q_res_icount) ? - (statp->f_ffree - dqp->q_res_icount) : 0; + (statp->f_files > be64_to_cpu(dp->d_icount)) ? + (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; } } @@ -82,7 +82,7 @@ xfs_qm_statvfs( xfs_dquot_t *dqp; if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { - xfs_fill_statvfs_from_dquot(statp, dqp); + xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); xfs_qm_dqput(dqp); } } @@ -156,3 +156,21 @@ xfs_qm_newmount( return 0; } + +void __init +xfs_qm_init(void) +{ + printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); + mutex_init(&xfs_Gqm_lock); + xfs_qm_init_procfs(); +} + +void __exit +xfs_qm_exit(void) +{ + xfs_qm_cleanup_procfs(); + if (qm_dqzone) + kmem_zone_destroy(qm_dqzone); + if (qm_dqtrxzone) + kmem_zone_destroy(qm_dqtrxzone); +} diff --git a/trunk/fs/xfs/xfs_qm_stats.c b/trunk/fs/xfs/xfs_qm_stats.c new file mode 100644 index 000000000000..5729ba570877 --- /dev/null +++ b/trunk/fs/xfs/xfs_qm_stats.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + +struct xqmstats xqmstats; + +static int xqm_proc_show(struct seq_file *m, void *v) +{ + /* maximum; incore; ratio free to inuse; freelist */ + seq_printf(m, "%d\t%d\t%d\t%u\n", + 0, + xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, + 0, + xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); + return 0; +} + +static int xqm_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqm_proc_show, NULL); +} + +static const struct file_operations xqm_proc_fops = { + .owner = THIS_MODULE, + .open = xqm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int xqmstat_proc_show(struct seq_file *m, void *v) +{ + /* quota performance statistics */ + seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", + xqmstats.xs_qm_dqreclaims, + xqmstats.xs_qm_dqreclaim_misses, + xqmstats.xs_qm_dquot_dups, + xqmstats.xs_qm_dqcachemisses, + xqmstats.xs_qm_dqcachehits, + xqmstats.xs_qm_dqwants, + xqmstats.xs_qm_dqshake_reclaims, + xqmstats.xs_qm_dqinact_reclaims); + return 0; +} + +static int xqmstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqmstat_proc_show, NULL); +} + +static const struct file_operations xqmstat_proc_fops = { + .owner = THIS_MODULE, + .open = xqmstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void +xfs_qm_init_procfs(void) +{ + proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); + proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); +} + +void +xfs_qm_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/xqm", NULL); + remove_proc_entry("fs/xfs/xqmstat", NULL); +} diff --git a/trunk/fs/xfs/xfs_qm_stats.h b/trunk/fs/xfs/xfs_qm_stats.h new file mode 100644 index 000000000000..5b964fc0dc09 --- /dev/null +++ b/trunk/fs/xfs/xfs_qm_stats.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_STATS_H__ +#define __XFS_QM_STATS_H__ + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +/* + * XQM global statistics + */ +struct xqmstats { + __uint32_t xs_qm_dqreclaims; + __uint32_t xs_qm_dqreclaim_misses; + __uint32_t xs_qm_dquot_dups; + __uint32_t xs_qm_dqcachemisses; + __uint32_t xs_qm_dqcachehits; + __uint32_t xs_qm_dqwants; + __uint32_t xs_qm_dqshake_reclaims; + __uint32_t xs_qm_dqinact_reclaims; +}; + +extern struct xqmstats xqmstats; + +# define XQM_STATS_INC(count) ( (count)++ ) + +extern void xfs_qm_init_procfs(void); +extern void xfs_qm_cleanup_procfs(void); + +#else + +# define XQM_STATS_INC(count) do { } while (0) + +static inline void xfs_qm_init_procfs(void) { }; +static inline void xfs_qm_cleanup_procfs(void) { }; + +#endif + +#endif /* __XFS_QM_STATS_H__ */ diff --git a/trunk/fs/xfs/xfs_qm_syscalls.c b/trunk/fs/xfs/xfs_qm_syscalls.c index c4f396e437a8..711a86e39ff0 100644 --- a/trunk/fs/xfs/xfs_qm_syscalls.c +++ b/trunk/fs/xfs/xfs_qm_syscalls.c @@ -47,6 +47,9 @@ STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, uint); STATIC uint xfs_qm_export_flags(uint); STATIC uint xfs_qm_export_qtype_flags(uint); +STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, + fs_disk_quota_t *); + /* * Turn off quota accounting and/or enforcement for all udquots and/or @@ -66,6 +69,7 @@ xfs_qm_scall_quotaoff( int error; uint inactivate_flags; xfs_qoff_logitem_t *qoffstart; + int nculprits; /* * No file system can have quotas enabled on disk but not in core. @@ -171,13 +175,18 @@ xfs_qm_scall_quotaoff( * This isn't protected by a particular lock directly, because we * don't want to take a mrlock every time we depend on quotas being on. */ - mp->m_qflags &= ~flags; + mp->m_qflags &= ~(flags); /* * Go through all the dquots of this file system and purge them, - * according to what was turned off. + * according to what was turned off. We may not be able to get rid + * of all dquots, because dquots can have temporary references that + * are not attached to inodes. eg. xfs_setattr, xfs_create. + * So, if we couldn't purge all the dquots from the filesystem, + * we can't get rid of the incore data structures. */ - xfs_qm_dqpurge_all(mp, dqtype); + while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) + delay(10 * nculprits); /* * Transactions that had started before ACTIVE state bit was cleared @@ -626,6 +635,42 @@ xfs_qm_scall_setqlim( return error; } +int +xfs_qm_scall_getquota( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *out) +{ + xfs_dquot_t *dqp; + int error; + + /* + * Try to get the dquot. We don't want it allocated on disk, so + * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't + * exist, we'll get ENOENT back. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { + return (error); + } + + /* + * If everything's NULL, this dquot doesn't quite exist as far as + * our utility programs are concerned. + */ + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_qm_dqput(dqp); + return XFS_ERROR(ENOENT); + } + /* + * Convert the disk dquot to the exportable format + */ + xfs_qm_export_dquot(mp, &dqp->q_core, out); + xfs_qm_dqput(dqp); + return (error ? XFS_ERROR(EFAULT) : 0); +} + + STATIC int xfs_qm_log_quotaoff_end( xfs_mount_t *mp, @@ -714,66 +759,50 @@ xfs_qm_log_quotaoff( } -int -xfs_qm_scall_getquota( - struct xfs_mount *mp, - xfs_dqid_t id, - uint type, +/* + * Translate an internal style on-disk-dquot to the exportable format. + * The main differences are that the counters/limits are all in Basic + * Blocks (BBs) instead of the internal FSBs, and all on-disk data has + * to be converted to the native endianness. + */ +STATIC void +xfs_qm_export_dquot( + xfs_mount_t *mp, + xfs_disk_dquot_t *src, struct fs_disk_quota *dst) { - struct xfs_dquot *dqp; - int error; - - /* - * Try to get the dquot. We don't want it allocated on disk, so - * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't - * exist, we'll get ENOENT back. - */ - error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp); - if (error) - return error; - - /* - * If everything's NULL, this dquot doesn't quite exist as far as - * our utility programs are concerned. - */ - if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { - error = XFS_ERROR(ENOENT); - goto out_put; - } - memset(dst, 0, sizeof(*dst)); - dst->d_version = FS_DQUOT_VERSION; - dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags); - dst->d_id = be32_to_cpu(dqp->q_core.d_id); + dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ + dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); + dst->d_id = be32_to_cpu(src->d_id); dst->d_blk_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); dst->d_blk_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit)); - dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); - dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); - dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount); - dst->d_icount = dqp->q_res_icount; - dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer); - dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer); - dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns); - dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns); + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); + dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); + dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); + dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); + dst->d_icount = be64_to_cpu(src->d_icount); + dst->d_btimer = be32_to_cpu(src->d_btimer); + dst->d_itimer = be32_to_cpu(src->d_itimer); + dst->d_iwarns = be16_to_cpu(src->d_iwarns); + dst->d_bwarns = be16_to_cpu(src->d_bwarns); dst->d_rtb_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); dst->d_rtb_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); - dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount); - dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer); - dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns); + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); + dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); + dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); + dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); /* * Internally, we don't reset all the timers when quota enforcement * gets turned off. No need to confuse the user level code, * so return zeroes in that case. */ - if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) || + if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || (!XFS_IS_OQUOTA_ENFORCED(mp) && - (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { + (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { dst->d_btimer = 0; dst->d_itimer = 0; dst->d_rtbtimer = 0; @@ -794,9 +823,6 @@ xfs_qm_scall_getquota( } } #endif -out_put: - xfs_qm_dqput(dqp); - return error; } STATIC uint diff --git a/trunk/fs/xfs/xfs_quota.h b/trunk/fs/xfs/xfs_quota.h index b50ec5b95d5a..8a0807e0f979 100644 --- a/trunk/fs/xfs/xfs_quota.h +++ b/trunk/fs/xfs/xfs_quota.h @@ -174,8 +174,6 @@ typedef struct xfs_qoff_logformat { #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ -#define XFS_ALL_QUOTA_ACTIVE \ - (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) /* * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees diff --git a/trunk/fs/xfs/xfs_quota_priv.h b/trunk/fs/xfs/xfs_quota_priv.h index 6d86219d93da..94a3d927d716 100644 --- a/trunk/fs/xfs/xfs_quota_priv.h +++ b/trunk/fs/xfs/xfs_quota_priv.h @@ -24,6 +24,17 @@ */ #define XFS_DQITER_MAP_SIZE 10 +/* + * Hash into a bucket in the dquot hash table, based on . + */ +#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (xfs_Gqm->qm_dqhashmask - 1)) +#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ + (xfs_Gqm->qm_usr_dqhtable + \ + XFS_DQ_HASHVAL(mp, id)) : \ + (xfs_Gqm->qm_grp_dqhtable + \ + XFS_DQ_HASHVAL(mp, id))) #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ !dqp->q_core.d_blk_hardlimit && \ !dqp->q_core.d_blk_softlimit && \ diff --git a/trunk/fs/xfs/xfs_sb.h b/trunk/fs/xfs/xfs_sb.h index f429d9d5d325..cb6ae715814a 100644 --- a/trunk/fs/xfs/xfs_sb.h +++ b/trunk/fs/xfs/xfs_sb.h @@ -529,6 +529,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) #define XFS_BB_TO_FSB(mp,bb) \ (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) +#define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1)) /* * File system block to byte conversions. diff --git a/trunk/fs/xfs/xfs_stats.c b/trunk/fs/xfs/xfs_stats.c index ce372b7d5644..76fdc5861932 100644 --- a/trunk/fs/xfs/xfs_stats.c +++ b/trunk/fs/xfs/xfs_stats.c @@ -20,18 +20,9 @@ DEFINE_PER_CPU(struct xfsstats, xfsstats); -static int counter_val(int idx) -{ - int val = 0, cpu; - - for_each_possible_cpu(cpu) - val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); - return val; -} - static int xfs_stat_proc_show(struct seq_file *m, void *v) { - int i, j; + int c, i, j, val; __uint64_t xs_xstrat_bytes = 0; __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; @@ -59,16 +50,20 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) { "abtc2", XFSSTAT_END_ABTC_V2 }, { "bmbt2", XFSSTAT_END_BMBT_V2 }, { "ibt2", XFSSTAT_END_IBT_V2 }, - /* we print both series of quota information together */ - { "qm", XFSSTAT_END_QM }, }; /* Loop over all stats groups */ - for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { + for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { seq_printf(m, "%s", xstats[i].desc); /* inner loop does each group */ - for (; j < xstats[i].endpoint; j++) - seq_printf(m, " %u", counter_val(j)); + while (j < xstats[i].endpoint) { + val = 0; + /* sum over all cpus */ + for_each_possible_cpu(c) + val += *(((__u32*)&per_cpu(xfsstats, c) + j)); + seq_printf(m, " %u", val); + j++; + } seq_putc(m, '\n'); } /* extra precision counters */ @@ -102,58 +97,6 @@ static const struct file_operations xfs_stat_proc_fops = { .release = single_release, }; -/* legacy quota interfaces */ -#ifdef CONFIG_XFS_QUOTA -static int xqm_proc_show(struct seq_file *m, void *v) -{ - /* maximum; incore; ratio free to inuse; freelist */ - seq_printf(m, "%d\t%d\t%d\t%u\n", - 0, - counter_val(XFSSTAT_END_XQMSTAT), - 0, - counter_val(XFSSTAT_END_XQMSTAT + 1)); - return 0; -} - -static int xqm_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqm_proc_show, NULL); -} - -static const struct file_operations xqm_proc_fops = { - .owner = THIS_MODULE, - .open = xqm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* legacy quota stats interface no 2 */ -static int xqmstat_proc_show(struct seq_file *m, void *v) -{ - int j; - - seq_printf(m, "qm"); - for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) - seq_printf(m, " %u", counter_val(j)); - seq_putc(m, '\n'); - return 0; -} - -static int xqmstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqmstat_proc_show, NULL); -} - -static const struct file_operations xqmstat_proc_fops = { - .owner = THIS_MODULE, - .open = xqmstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif /* CONFIG_XFS_QUOTA */ - int xfs_init_procfs(void) { @@ -162,24 +105,10 @@ xfs_init_procfs(void) if (!proc_create("fs/xfs/stat", 0, NULL, &xfs_stat_proc_fops)) - goto out_remove_xfs_dir; -#ifdef CONFIG_XFS_QUOTA - if (!proc_create("fs/xfs/xqmstat", 0, NULL, - &xqmstat_proc_fops)) - goto out_remove_stat_file; - if (!proc_create("fs/xfs/xqm", 0, NULL, - &xqm_proc_fops)) - goto out_remove_xqmstat_file; -#endif + goto out_remove_entry; return 0; -#ifdef CONFIG_XFS_QUOTA - out_remove_xqmstat_file: - remove_proc_entry("fs/xfs/xqmstat", NULL); - out_remove_stat_file: - remove_proc_entry("fs/xfs/stat", NULL); -#endif - out_remove_xfs_dir: + out_remove_entry: remove_proc_entry("fs/xfs", NULL); out: return -ENOMEM; @@ -188,10 +117,6 @@ xfs_init_procfs(void) void xfs_cleanup_procfs(void) { -#ifdef CONFIG_XFS_QUOTA - remove_proc_entry("fs/xfs/xqm", NULL); - remove_proc_entry("fs/xfs/xqmstat", NULL); -#endif remove_proc_entry("fs/xfs/stat", NULL); remove_proc_entry("fs/xfs", NULL); } diff --git a/trunk/fs/xfs/xfs_stats.h b/trunk/fs/xfs/xfs_stats.h index c03ad38ceaeb..736854b1ca1a 100644 --- a/trunk/fs/xfs/xfs_stats.h +++ b/trunk/fs/xfs/xfs_stats.h @@ -183,16 +183,6 @@ struct xfsstats { __uint32_t xs_ibt_2_alloc; __uint32_t xs_ibt_2_free; __uint32_t xs_ibt_2_moves; -#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) - __uint32_t xs_qm_dqreclaims; - __uint32_t xs_qm_dqreclaim_misses; - __uint32_t xs_qm_dquot_dups; - __uint32_t xs_qm_dqcachemisses; - __uint32_t xs_qm_dqcachehits; - __uint32_t xs_qm_dqwants; -#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2) - __uint32_t xs_qm_dquot; - __uint32_t xs_qm_dquot_unused; /* Extra precision counters */ __uint64_t xs_xstrat_bytes; __uint64_t xs_write_bytes; diff --git a/trunk/fs/xfs/xfs_super.c b/trunk/fs/xfs/xfs_super.c index 912442cf0f82..baf40e378d35 100644 --- a/trunk/fs/xfs/xfs_super.c +++ b/trunk/fs/xfs/xfs_super.c @@ -324,9 +324,10 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { mp->m_flags |= XFS_MOUNT_FILESTREAMS; } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; - mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; - mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; + mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || !strcmp(this_char, MNTOPT_USRQUOTA)) { @@ -759,36 +760,6 @@ xfs_setup_devices( return 0; } -STATIC int -xfs_init_mount_workqueues( - struct xfs_mount *mp) -{ - mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", - WQ_MEM_RECLAIM, 0, mp->m_fsname); - if (!mp->m_data_workqueue) - goto out; - - mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", - WQ_MEM_RECLAIM, 0, mp->m_fsname); - if (!mp->m_unwritten_workqueue) - goto out_destroy_data_iodone_queue; - - return 0; - -out_destroy_data_iodone_queue: - destroy_workqueue(mp->m_data_workqueue); -out: - return -ENOMEM; -} - -STATIC void -xfs_destroy_mount_workqueues( - struct xfs_mount *mp) -{ - destroy_workqueue(mp->m_data_workqueue); - destroy_workqueue(mp->m_unwritten_workqueue); -} - /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -863,58 +834,91 @@ xfs_fs_inode_init_once( } /* - * This is called by the VFS when dirtying inode metadata. This can happen - * for a few reasons, but we only care about timestamp updates, given that - * we handled the rest ourselves. In theory no other calls should happen, - * but for example generic_write_end() keeps dirtying the inode after - * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC, - * and skip this call otherwise. + * Dirty the XFS inode when mark_inode_dirty_sync() is called so that + * we catch unlogged VFS level updates to the inode. * - * We'll hopefull get a different method just for updating timestamps soon, - * at which point this hack can go away, and maybe we'll also get real - * error handling here. + * We need the barrier() to maintain correct ordering between unlogged + * updates and the transaction commit code that clears the i_update_core + * field. This requires all updates to be completed before marking the + * inode dirty. */ STATIC void xfs_fs_dirty_inode( + struct inode *inode, + int flags) +{ + barrier(); + XFS_I(inode)->i_update_core = 1; +} + +STATIC int +xfs_fs_write_inode( struct inode *inode, - int flags) + struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; + int error = EAGAIN; - if (flags != I_DIRTY_SYNC) - return; + trace_xfs_write_inode(ip); - trace_xfs_dirty_inode(ip); + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto trouble; + if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) { + /* + * Make sure the inode has made it it into the log. Instead + * of forcing it all the way to stable storage using a + * synchronous transaction we let the log force inside the + * ->sync_fs call do that for thus, which reduces the number + * of synchronous log forces dramatically. + */ + error = xfs_log_dirty_inode(ip, NULL, 0); + if (error) + goto out; + return 0; + } else { + if (!ip->i_update_core) + return 0; + + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by + * xfs_sync. + */ + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) + goto out; + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; + + /* + * Now we have the flush lock and the inode is not pinned, we + * can check if the inode is really clean as we know that + * there are no pending transaction completions, it is not + * waiting on the delayed write queue and there is no IO in + * progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; + } + error = xfs_iflush(ip, SYNC_TRYLOCK); } - xfs_ilock(ip, XFS_ILOCK_EXCL); + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + out: /* - * Grab all the latest timestamps from the Linux inode. + * if we failed to write out the inode then mark + * it dirty again so we'll try again later. */ - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; - ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; - ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); - error = xfs_trans_commit(tp, 0); if (error) - goto trouble; - return; - -trouble: - xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino); + xfs_mark_inode_dirty_sync(ip); + return -error; } STATIC void @@ -979,7 +983,6 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); xfs_icsb_destroy_counters(mp); - xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); xfs_free_fsname(mp); kfree(mp); @@ -1306,13 +1309,9 @@ xfs_fs_fill_super( if (error) goto out_free_fsname; - error = xfs_init_mount_workqueues(mp); - if (error) - goto out_close_devices; - error = xfs_icsb_init_counters(mp); if (error) - goto out_destroy_workqueues; + goto out_close_devices; error = xfs_readsb(mp, flags); if (error) @@ -1377,8 +1376,6 @@ xfs_fs_fill_super( xfs_freesb(mp); out_destroy_counters: xfs_icsb_destroy_counters(mp); -out_destroy_workqueues: - xfs_destroy_mount_workqueues(mp); out_close_devices: xfs_close_devices(mp); out_free_fsname: @@ -1432,6 +1429,7 @@ static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, + .write_inode = xfs_fs_write_inode, .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, @@ -1653,17 +1651,13 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; - error = xfs_qm_init(); - if (error) - goto out_sysctl_unregister; + vfs_initquota(); error = register_filesystem(&xfs_fs_type); if (error) - goto out_qm_exit; + goto out_sysctl_unregister; return 0; - out_qm_exit: - xfs_qm_exit(); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: @@ -1685,7 +1679,7 @@ init_xfs_fs(void) STATIC void __exit exit_xfs_fs(void) { - xfs_qm_exit(); + vfs_exitquota(); unregister_filesystem(&xfs_fs_type); xfs_sysctl_unregister(); xfs_cleanup_procfs(); diff --git a/trunk/fs/xfs/xfs_super.h b/trunk/fs/xfs/xfs_super.h index 09b0c26b2245..50a3266c999e 100644 --- a/trunk/fs/xfs/xfs_super.h +++ b/trunk/fs/xfs/xfs_super.h @@ -21,11 +21,13 @@ #include #ifdef CONFIG_XFS_QUOTA -extern int xfs_qm_init(void); +extern void xfs_qm_init(void); extern void xfs_qm_exit(void); +# define vfs_initquota() xfs_qm_init() +# define vfs_exitquota() xfs_qm_exit() #else -# define xfs_qm_init() (0) -# define xfs_qm_exit() do { } while (0) +# define vfs_initquota() do { } while (0) +# define vfs_exitquota() do { } while (0) #endif #ifdef CONFIG_XFS_POSIX_ACL diff --git a/trunk/fs/xfs/xfs_sync.c b/trunk/fs/xfs/xfs_sync.c index 205ebcb34d9e..40b75eecd2b4 100644 --- a/trunk/fs/xfs/xfs_sync.c +++ b/trunk/fs/xfs/xfs_sync.c @@ -336,6 +336,32 @@ xfs_sync_fsdata( return error; } +int +xfs_log_dirty_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + if (!ip->i_update_core) + return 0; + + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + return xfs_trans_commit(tp, 0); +} + /* * When remounting a filesystem read-only or freezing the filesystem, we have * two phases to execute. This first phase is syncing the data before we @@ -359,6 +385,16 @@ xfs_quiesce_data( { int error, error2 = 0; + /* + * Log all pending size and timestamp updates. The vfs writeback + * code is supposed to do this, but due to its overagressive + * livelock detection it will skip inodes where appending writes + * were written out in the first non-blocking sync phase if their + * completion took long enough that it happened after taking the + * timestamp for the cut-off in the blocking phase. + */ + xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0); + /* force out the log */ xfs_log_force(mp, XFS_LOG_SYNC); @@ -877,15 +913,17 @@ xfs_reclaim_inode( * can reference the inodes in the cache without taking references. * * We make that OK here by ensuring that we wait until the inode is - * unlocked after the lookup before we go ahead and free it. + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. */ - xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_qm_dqdetach(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_inode_free(ip); - return error; + } /* diff --git a/trunk/fs/xfs/xfs_sync.h b/trunk/fs/xfs/xfs_sync.h index 941202e7ac6e..fa965479d788 100644 --- a/trunk/fs/xfs/xfs_sync.h +++ b/trunk/fs/xfs/xfs_sync.h @@ -34,6 +34,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); +int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags); + int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); int xfs_reclaim_inodes_count(struct xfs_mount *mp); void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); diff --git a/trunk/fs/xfs/xfs_trace.h b/trunk/fs/xfs/xfs_trace.h index 75eb54af4d58..bb134a819930 100644 --- a/trunk/fs/xfs/xfs_trace.h +++ b/trunk/fs/xfs/xfs_trace.h @@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr); DEFINE_INODE_EVENT(xfs_dir_fsync); DEFINE_INODE_EVENT(xfs_file_fsync); DEFINE_INODE_EVENT(xfs_destroy_inode); -DEFINE_INODE_EVENT(xfs_dirty_inode); +DEFINE_INODE_EVENT(xfs_write_inode); DEFINE_INODE_EVENT(xfs_evict_inode); DEFINE_INODE_EVENT(xfs_dquot_dqalloc); @@ -741,10 +741,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqread); DEFINE_DQUOT_EVENT(xfs_dqread_fail); +DEFINE_DQUOT_EVENT(xfs_dqlookup_found); +DEFINE_DQUOT_EVENT(xfs_dqlookup_done); DEFINE_DQUOT_EVENT(xfs_dqget_hit); DEFINE_DQUOT_EVENT(xfs_dqget_miss); -DEFINE_DQUOT_EVENT(xfs_dqget_freeing); -DEFINE_DQUOT_EVENT(xfs_dqget_dup); DEFINE_DQUOT_EVENT(xfs_dqput); DEFINE_DQUOT_EVENT(xfs_dqput_wait); DEFINE_DQUOT_EVENT(xfs_dqput_free); @@ -782,12 +782,12 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, __entry->curr_res = tic->t_curr_res; __entry->unit_res = tic->t_unit_res; __entry->flags = tic->t_flags; - __entry->reserveq = list_empty(&log->l_reserve_head.waiters); - __entry->writeq = list_empty(&log->l_write_head.waiters); - xlog_crack_grant_head(&log->l_reserve_head.grant, + __entry->reserveq = list_empty(&log->l_reserveq); + __entry->writeq = list_empty(&log->l_writeq); + xlog_crack_grant_head(&log->l_grant_reserve_head, &__entry->grant_reserve_cycle, &__entry->grant_reserve_bytes); - xlog_crack_grant_head(&log->l_write_head.grant, + xlog_crack_grant_head(&log->l_grant_write_head, &__entry->grant_write_cycle, &__entry->grant_write_bytes); __entry->curr_cycle = log->l_curr_cycle; @@ -826,14 +826,20 @@ DEFINE_EVENT(xfs_loggrant_class, name, \ TP_ARGS(log, tic)) DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); +DEFINE_LOGGRANT_EVENT(xfs_log_reserve); DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); -DEFINE_LOGGRANT_EVENT(xfs_log_reserve); -DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index 103b00c90004..7adcdf15ae0c 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -681,6 +681,7 @@ xfs_trans_reserve( uint flags, uint logcount) { + int log_flags; int error = 0; int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; @@ -706,32 +707,24 @@ xfs_trans_reserve( * Reserve the log space needed for this transaction. */ if (logspace > 0) { - bool permanent = false; - - ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace); - ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount); - + ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace)); + ASSERT((tp->t_log_count == 0) || + (tp->t_log_count == logcount)); if (flags & XFS_TRANS_PERM_LOG_RES) { + log_flags = XFS_LOG_PERM_RESERV; tp->t_flags |= XFS_TRANS_PERM_LOG_RES; - permanent = true; } else { ASSERT(tp->t_ticket == NULL); ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + log_flags = 0; } - if (tp->t_ticket != NULL) { - ASSERT(flags & XFS_TRANS_PERM_LOG_RES); - error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); - } else { - error = xfs_log_reserve(tp->t_mountp, logspace, - logcount, &tp->t_ticket, - XFS_TRANSACTION, permanent, - tp->t_type); - } - - if (error) + error = xfs_log_reserve(tp->t_mountp, logspace, logcount, + &tp->t_ticket, + XFS_TRANSACTION, log_flags, tp->t_type); + if (error) { goto undo_blocks; - + } tp->t_log_res = logspace; tp->t_log_count = logcount; } @@ -759,8 +752,6 @@ xfs_trans_reserve( */ undo_log: if (logspace > 0) { - int log_flags; - if (flags & XFS_TRANS_PERM_LOG_RES) { log_flags = XFS_LOG_REL_PERM_RESERV; } else { diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c index 1dead07f092c..ed9252bcdac9 100644 --- a/trunk/fs/xfs/xfs_trans_ail.c +++ b/trunk/fs/xfs/xfs_trans_ail.c @@ -610,6 +610,50 @@ xfs_ail_push_all( xfs_ail_push(ailp, threshold_lsn); } +/* + * This is to be called when an item is unlocked that may have + * been in the AIL. It will wake up the first member of the AIL + * wait list if this item's unlocking might allow it to progress. + * If the item is in the AIL, then we need to get the AIL lock + * while doing our checking so we don't race with someone going + * to sleep waiting for this event in xfs_trans_push_ail(). + */ +void +xfs_trans_unlocked_item( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_log_item_t *min_lip; + + /* + * If we're forcibly shutting down, we may have + * unlocked log items arbitrarily. The last thing + * we want to do is to move the tail of the log + * over some potentially valid data. + */ + if (!(lip->li_flags & XFS_LI_IN_AIL) || + XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { + return; + } + + /* + * This is the one case where we can call into xfs_ail_min() + * without holding the AIL lock because we only care about the + * case where we are at the tail of the AIL. If the object isn't + * at the tail, it doesn't matter what result we get back. This + * is slightly racy because since we were just unlocked, we could + * go to sleep between the call to xfs_ail_min and the call to + * xfs_log_move_tail, have someone else lock us, commit to us disk, + * move us out of the tail of the AIL, and then we wake up. However, + * the call to xfs_log_move_tail() doesn't do anything if there's + * not enough free space to wake people up so we're safe calling it. + */ + min_lip = xfs_ail_min(ailp); + + if (min_lip == lip) + xfs_log_move_tail(ailp->xa_mount, 1); +} /* xfs_trans_unlocked_item */ + /* * xfs_trans_ail_update - bulk AIL insertion operation. * @@ -641,6 +685,7 @@ xfs_trans_ail_update_bulk( xfs_lsn_t lsn) __releases(ailp->xa_lock) { xfs_log_item_t *mlip; + xfs_lsn_t tail_lsn; int mlip_changed = 0; int i; LIST_HEAD(tmp); @@ -667,12 +712,22 @@ xfs_trans_ail_update_bulk( if (!list_empty(&tmp)) xfs_ail_splice(ailp, cur, &tmp, lsn); - spin_unlock(&ailp->xa_lock); - if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { - xlog_assign_tail_lsn(ailp->xa_mount); - xfs_log_space_wake(ailp->xa_mount); + if (!mlip_changed) { + spin_unlock(&ailp->xa_lock); + return; } + + /* + * It is not safe to access mlip after the AIL lock is dropped, so we + * must get a copy of li_lsn before we do so. This is especially + * important on 32-bit platforms where accessing and updating 64-bit + * values like li_lsn is not atomic. + */ + mlip = xfs_ail_min(ailp); + tail_lsn = mlip->li_lsn; + spin_unlock(&ailp->xa_lock); + xfs_log_move_tail(ailp->xa_mount, tail_lsn); } /* @@ -703,6 +758,7 @@ xfs_trans_ail_delete_bulk( int nr_items) __releases(ailp->xa_lock) { xfs_log_item_t *mlip; + xfs_lsn_t tail_lsn; int mlip_changed = 0; int i; @@ -729,12 +785,23 @@ xfs_trans_ail_delete_bulk( if (mlip == lip) mlip_changed = 1; } - spin_unlock(&ailp->xa_lock); - if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { - xlog_assign_tail_lsn(ailp->xa_mount); - xfs_log_space_wake(ailp->xa_mount); + if (!mlip_changed) { + spin_unlock(&ailp->xa_lock); + return; } + + /* + * It is not safe to access mlip after the AIL lock is dropped, so we + * must get a copy of li_lsn before we do so. This is especially + * important on 32-bit platforms where accessing and updating 64-bit + * values like li_lsn is not atomic. It is possible we've emptied the + * AIL here, so if that is the case, pass an LSN of 0 to the tail move. + */ + mlip = xfs_ail_min(ailp); + tail_lsn = mlip ? mlip->li_lsn : 0; + spin_unlock(&ailp->xa_lock); + xfs_log_move_tail(ailp->xa_mount, tail_lsn); } /* diff --git a/trunk/fs/xfs/xfs_trans_buf.c b/trunk/fs/xfs/xfs_trans_buf.c index 1302d1d95a58..475a4ded4f41 100644 --- a/trunk/fs/xfs/xfs_trans_buf.c +++ b/trunk/fs/xfs/xfs_trans_buf.c @@ -463,7 +463,19 @@ xfs_trans_brelse(xfs_trans_t *tp, * Default to a normal brelse() call if the tp is NULL. */ if (tp == NULL) { + struct xfs_log_item *lip = bp->b_fspriv; + ASSERT(bp->b_transp == NULL); + + /* + * If there's a buf log item attached to the buffer, + * then let the AIL know that the buffer is being + * unlocked. + */ + if (lip != NULL && lip->li_type == XFS_LI_BUF) { + bip = bp->b_fspriv; + xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip); + } xfs_buf_relse(bp); return; } @@ -538,10 +550,21 @@ xfs_trans_brelse(xfs_trans_t *tp, ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); xfs_buf_item_relse(bp); + bip = NULL; } - bp->b_transp = NULL; + + /* + * If we've still got a buf log item on the buffer, then + * tell the AIL that the buffer is being unlocked. + */ + if (bip != NULL) { + xfs_trans_unlocked_item(bip->bli_item.li_ailp, + (xfs_log_item_t*)bip); + } + xfs_buf_relse(bp); + return; } /* diff --git a/trunk/fs/xfs/xfs_trans_dquot.c b/trunk/fs/xfs/xfs_trans_dquot.c index 279099717ed2..c4ba366d24e6 100644 --- a/trunk/fs/xfs/xfs_trans_dquot.c +++ b/trunk/fs/xfs/xfs_trans_dquot.c @@ -605,7 +605,7 @@ xfs_trans_dqresv( time_t timer; xfs_qwarncnt_t warns; xfs_qwarncnt_t warnlimit; - xfs_qcnt_t total_count; + xfs_qcnt_t count; xfs_qcnt_t *resbcountp; xfs_quotainfo_t *q = mp->m_quotainfo; @@ -648,12 +648,13 @@ xfs_trans_dqresv( * hardlimit or exceed the timelimit if we allocate * nblks. */ - total_count = *resbcountp + nblks; - if (hardlimit && total_count > hardlimit) { + if (hardlimit > 0ULL && + hardlimit < nblks + *resbcountp) { xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); goto error_return; } - if (softlimit && total_count > softlimit) { + if (softlimit > 0ULL && + softlimit < nblks + *resbcountp) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, @@ -665,7 +666,7 @@ xfs_trans_dqresv( } } if (ninos > 0) { - total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos; + count = be64_to_cpu(dqp->q_core.d_icount); timer = be32_to_cpu(dqp->q_core.d_itimer); warns = be16_to_cpu(dqp->q_core.d_iwarns); warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; @@ -676,11 +677,13 @@ xfs_trans_dqresv( if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit && total_count > hardlimit) { + if (hardlimit > 0ULL && + hardlimit < ninos + count) { xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); goto error_return; } - if (softlimit && total_count > softlimit) { + if (softlimit > 0ULL && + softlimit < ninos + count) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, @@ -875,7 +878,7 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP); + tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); } void @@ -884,6 +887,6 @@ xfs_trans_free_dqinfo( { if (!tp->t_dqinfo) return; - kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo); + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); tp->t_dqinfo = NULL; } diff --git a/trunk/fs/xfs/xfs_trans_inode.c b/trunk/fs/xfs/xfs_trans_inode.c index 7a7442c03f2b..32f0288ae10f 100644 --- a/trunk/fs/xfs/xfs_trans_inode.c +++ b/trunk/fs/xfs/xfs_trans_inode.c @@ -95,14 +95,10 @@ xfs_trans_ichgtime( if ((flags & XFS_ICHGTIME_MOD) && !timespec_equal(&inode->i_mtime, &tv)) { inode->i_mtime = tv; - ip->i_d.di_mtime.t_sec = tv.tv_sec; - ip->i_d.di_mtime.t_nsec = tv.tv_nsec; } if ((flags & XFS_ICHGTIME_CHG) && !timespec_equal(&inode->i_ctime, &tv)) { inode->i_ctime = tv; - ip->i_d.di_ctime.t_sec = tv.tv_sec; - ip->i_d.di_ctime.t_nsec = tv.tv_nsec; } } @@ -130,12 +126,12 @@ xfs_trans_log_inode( /* * Always OR in the bits from the ili_last_fields field. * This is to coordinate with the xfs_iflush() and xfs_iflush_done() - * routines in the eventual clearing of the ili_fields bits. + * routines in the eventual clearing of the ilf_fields bits. * See the big comment in xfs_iflush() for an explanation of * this coordination mechanism. */ flags |= ip->i_itemp->ili_last_fields; - ip->i_itemp->ili_fields |= flags; + ip->i_itemp->ili_format.ilf_fields |= flags; } #ifdef XFS_TRANS_DEBUG diff --git a/trunk/fs/xfs/xfs_trans_priv.h b/trunk/fs/xfs/xfs_trans_priv.h index 8ab2ced415f1..44820b9fcb43 100644 --- a/trunk/fs/xfs/xfs_trans_priv.h +++ b/trunk/fs/xfs/xfs_trans_priv.h @@ -104,6 +104,9 @@ void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); void xfs_ail_push_all(struct xfs_ail *); xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); +void xfs_trans_unlocked_item(struct xfs_ail *, + xfs_log_item_t *); + struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); diff --git a/trunk/fs/xfs/xfs_vnode.h b/trunk/fs/xfs/xfs_vnode.h index db14d0c08682..7c220b4227bc 100644 --- a/trunk/fs/xfs/xfs_vnode.h +++ b/trunk/fs/xfs/xfs_vnode.h @@ -22,6 +22,7 @@ struct file; struct xfs_inode; +struct xfs_iomap; struct attrlist_cursor_kern; /* diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h index 447e146b2ba6..0c877cbde142 100644 --- a/trunk/fs/xfs/xfs_vnodeops.h +++ b/trunk/fs/xfs/xfs_vnodeops.h @@ -10,6 +10,7 @@ struct kiocb; struct pipe_inode_info; struct uio; struct xfs_inode; +struct xfs_iomap; int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); @@ -48,6 +49,8 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); +int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, + int flags, struct xfs_iomap *iomapp, int *niomaps); void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, xfs_off_t last, int fiopt); int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, diff --git a/trunk/include/asm-generic/pci-bridge.h b/trunk/include/asm-generic/pci-bridge.h index a5b5d5a89a4f..4a5aca2a2c94 100644 --- a/trunk/include/asm-generic/pci-bridge.h +++ b/trunk/include/asm-generic/pci-bridge.h @@ -45,11 +45,6 @@ static inline void pci_add_flags(int flags) pci_flags |= flags; } -static inline void pci_clear_flags(int flags) -{ - pci_flags &= ~flags; -} - static inline int pci_has_flag(int flag) { return pci_flags & flag; @@ -57,7 +52,6 @@ static inline int pci_has_flag(int flag) #else static inline void pci_set_flags(int flags) { } static inline void pci_add_flags(int flags) { } -static inline void pci_clear_flags(int flags) { } static inline int pci_has_flag(int flag) { return 0; diff --git a/trunk/include/asm-generic/pci.h b/trunk/include/asm-generic/pci.h index e80a0495e5b0..26373cff4546 100644 --- a/trunk/include/asm-generic/pci.h +++ b/trunk/include/asm-generic/pci.h @@ -6,6 +6,30 @@ #ifndef _ASM_GENERIC_PCI_H #define _ASM_GENERIC_PCI_H +/** + * pcibios_resource_to_bus - convert resource to PCI bus address + * @dev: device which owns this resource + * @region: converted bus-centric region (start,end) + * @res: resource to convert + * + * Convert a resource to a PCI device bus address or bus window. + */ +static inline void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + region->start = res->start; + region->end = res->end; +} + +static inline void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + res->start = region->start; + res->end = region->end; +} + static inline struct resource * pcibios_select_root(struct pci_dev *pdev, struct resource *res) { diff --git a/trunk/include/linux/amd-iommu.h b/trunk/include/linux/amd-iommu.h index 15f6b9edd0b1..ef00610837d4 100644 --- a/trunk/include/linux/amd-iommu.h +++ b/trunk/include/linux/amd-iommu.h @@ -28,7 +28,7 @@ struct task_struct; struct pci_dev; extern int amd_iommu_detect(void); -extern int amd_iommu_init_hardware(void); + /** * amd_iommu_enable_device_erratum() - Enable erratum workaround for device diff --git a/trunk/include/linux/i2c/at24.h b/trunk/include/linux/i2c/at24.h index 285025a9cdc9..8ace93024d60 100644 --- a/trunk/include/linux/i2c/at24.h +++ b/trunk/include/linux/i2c/at24.h @@ -1,42 +1,19 @@ -/* - * at24.h - platform_data for the at24 (generic eeprom) driver - * (C) Copyright 2008 by Pengutronix - * (C) Copyright 2012 by Wolfram Sang - * same license as the driver - */ - #ifndef _LINUX_AT24_H #define _LINUX_AT24_H #include #include -/** - * struct at24_platform_data - data to set up at24 (generic eeprom) driver - * @byte_len: size of eeprom in byte - * @page_size: number of byte which can be written in one go - * @flags: tunable options, check AT24_FLAG_* defines - * @setup: an optional callback invoked after eeprom is probed; enables kernel - code to access eeprom via memory_accessor, see example - * @context: optional parameter passed to setup() +/* + * As seen through Linux I2C, differences between the most common types of I2C + * memory include: + * - How much memory is available (usually specified in bit)? + * - What write page size does it support? + * - Special flags (16 bit addresses, read_only, world readable...)? * * If you set up a custom eeprom type, please double-check the parameters. * Especially page_size needs extra care, as you risk data loss if your value * is bigger than what the chip actually supports! - * - * An example in pseudo code for a setup() callback: - * - * void get_mac_addr(struct memory_accessor *mem_acc, void *context) - * { - * u8 *mac_addr = ethernet_pdata->mac_addr; - * off_t offset = context; - * - * // Read MAC addr from EEPROM - * if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN) - * pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr); - * } - * - * This function pointer and context can now be set up in at24_platform_data. */ struct at24_platform_data { diff --git a/trunk/include/linux/ioport.h b/trunk/include/linux/ioport.h index e885ba23de70..9d57a71775b5 100644 --- a/trunk/include/linux/ioport.h +++ b/trunk/include/linux/ioport.h @@ -23,6 +23,12 @@ struct resource { struct resource *parent, *sibling, *child; }; +struct resource_list { + struct resource_list *next; + struct resource *res; + struct pci_dev *dev; +}; + /* * IO resources have these defined flags. */ diff --git a/trunk/include/linux/key.h b/trunk/include/linux/key.h index 96933b1e5d24..1600ebf717a7 100644 --- a/trunk/include/linux/key.h +++ b/trunk/include/linux/key.h @@ -277,8 +277,6 @@ static inline key_serial_t key_serial(const struct key *key) return key ? key->serial : 0; } -extern void key_set_timeout(struct key *, unsigned); - /** * key_is_instantiated - Determine if a key has been positively instantiated * @key: The key to check. diff --git a/trunk/include/linux/lockd/bind.h b/trunk/include/linux/lockd/bind.h index 11a966e5f829..fbc48f898521 100644 --- a/trunk/include/linux/lockd/bind.h +++ b/trunk/include/linux/lockd/bind.h @@ -42,7 +42,6 @@ struct nlmclnt_initdata { unsigned short protocol; u32 nfs_version; int noresvport; - struct net *net; }; /* diff --git a/trunk/include/linux/lockd/lockd.h b/trunk/include/linux/lockd/lockd.h index f04ce6ac6d04..88a114fce477 100644 --- a/trunk/include/linux/lockd/lockd.h +++ b/trunk/include/linux/lockd/lockd.h @@ -67,7 +67,6 @@ struct nlm_host { struct list_head h_reclaim; /* Locks in RECLAIM state */ struct nsm_handle *h_nsmhandle; /* NSM status handle */ char *h_addrbuf; /* address eyecatcher */ - struct net *net; /* host net */ }; /* @@ -189,7 +188,7 @@ struct nlm_block { /* * Global variables */ -extern const struct rpc_program nlm_program; +extern struct rpc_program nlm_program; extern struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 extern struct svc_procedure nlmsvc_procedures4[]; @@ -223,8 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const unsigned short protocol, const u32 version, const char *hostname, - int noresvport, - struct net *net); + int noresvport); void nlmclnt_release_host(struct nlm_host *); struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const char *hostname, @@ -234,7 +232,6 @@ struct rpc_clnt * nlm_bind_host(struct nlm_host *); void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_shutdown_hosts(void); -void nlm_shutdown_hosts_net(struct net *net); void nlm_host_rebooted(const struct nlm_reboot *); /* diff --git a/trunk/include/linux/lockd/xdr4.h b/trunk/include/linux/lockd/xdr4.h index e58c88b52ce1..7353821341ed 100644 --- a/trunk/include/linux/lockd/xdr4.h +++ b/trunk/include/linux/lockd/xdr4.h @@ -42,6 +42,6 @@ int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *); */ -extern const struct rpc_version nlm_version4; +extern struct rpc_version nlm_version4; #endif /* LOCKD_XDR4_H */ diff --git a/trunk/include/linux/nfs.h b/trunk/include/linux/nfs.h index 6d1fb63f5922..8c6ee44914cb 100644 --- a/trunk/include/linux/nfs.h +++ b/trunk/include/linux/nfs.h @@ -29,7 +29,7 @@ #define NFS_MNT_VERSION 1 #define NFS_MNT3_VERSION 3 -#define NFS_PIPE_DIRNAME "nfs" +#define NFS_PIPE_DIRNAME "/nfs" /* * NFS stats. The good thing with these values is that NFSv3 errors are diff --git a/trunk/include/linux/nfs4.h b/trunk/include/linux/nfs4.h index 834df8bf08b6..32345c2805c0 100644 --- a/trunk/include/linux/nfs4.h +++ b/trunk/include/linux/nfs4.h @@ -183,12 +183,15 @@ struct nfs4_acl { typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; -struct nfs_stateid4 { +struct nfs41_stateid { __be32 seqid; char other[NFS4_STATEID_OTHER_SIZE]; } __attribute__ ((packed)); -typedef struct nfs_stateid4 nfs4_stateid; +typedef union { + char data[NFS4_STATEID_SIZE]; + struct nfs41_stateid stateid; +} nfs4_stateid; enum nfs_opnum4 { OP_ACCESS = 3, diff --git a/trunk/include/linux/nfs_fs.h b/trunk/include/linux/nfs_fs.h index 52a1bdb4ee2b..8c29950d2fa5 100644 --- a/trunk/include/linux/nfs_fs.h +++ b/trunk/include/linux/nfs_fs.h @@ -38,13 +38,6 @@ #ifdef __KERNEL__ -/* - * Enable dprintk() debugging support for nfs client. - */ -#ifdef CONFIG_NFS_DEBUG -# define NFS_DEBUG -#endif - #include #include #include @@ -178,9 +171,13 @@ struct nfs_inode { */ __be32 cookieverf[2]; + /* + * This is the list of dirty unwritten pages. + */ + struct radix_tree_root nfs_page_tree; + unsigned long npages; unsigned long ncommit; - struct list_head commit_list; /* Open contexts for shared mmap writes */ struct list_head open_files; @@ -398,29 +395,6 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh) kfree(fh); } -#ifdef NFS_DEBUG -extern u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh); -static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh) -{ - return _nfs_display_fhandle_hash(fh); -} -extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption); -#define nfs_display_fhandle(fh, caption) \ - do { \ - if (unlikely(nfs_debug & NFSDBG_FACILITY)) \ - _nfs_display_fhandle(fh, caption); \ - } while (0) -#else -static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -static inline void nfs_display_fhandle(const struct nfs_fh *fh, - const char *caption) -{ -} -#endif - /* * linux/fs/nfs/nfsroot.c */ @@ -658,13 +632,19 @@ nfs_fileid_to_ino_t(u64 fileid) #ifdef __KERNEL__ +/* + * Enable debugging support for nfs client. + * Requires RPC_DEBUG. + */ +#ifdef RPC_DEBUG +# define NFS_DEBUG +#endif + # undef ifdebug # ifdef NFS_DEBUG # define ifdebug(fac) if (unlikely(nfs_debug & NFSDBG_##fac)) -# define NFS_IFDEBUG(x) x # else # define ifdebug(fac) if (0) -# define NFS_IFDEBUG(x) # endif #endif /* __KERNEL */ diff --git a/trunk/include/linux/nfs_fs_i.h b/trunk/include/linux/nfs_fs_i.h index a5c50d97341e..861730275ba0 100644 --- a/trunk/include/linux/nfs_fs_i.h +++ b/trunk/include/linux/nfs_fs_i.h @@ -1,6 +1,10 @@ #ifndef _NFS_FS_I #define _NFS_FS_I +#include +#include +#include + struct nlm_lockowner; /* diff --git a/trunk/include/linux/nfs_fs_sb.h b/trunk/include/linux/nfs_fs_sb.h index 7073fc74481c..ba4d7656ecfd 100644 --- a/trunk/include/linux/nfs_fs_sb.h +++ b/trunk/include/linux/nfs_fs_sb.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -18,7 +17,6 @@ struct nfs4_sequence_res; struct nfs_server; struct nfs4_minor_version_ops; struct server_scope; -struct nfs41_impl_id; /* * The nfs_client identifies our client state to the server. @@ -87,8 +85,6 @@ struct nfs_client { #endif struct server_scope *server_scope; /* from exchange_id */ - struct nfs41_impl_id *impl_id; /* from exchange_id */ - struct net *net; }; /* @@ -148,18 +144,15 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; + struct rb_root openowner_id; + struct rb_root lockowner_id; #endif - struct ida openowner_id; - struct ida lockowner_id; struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; @@ -195,23 +188,21 @@ struct nfs_server { /* maximum number of slots to use */ -#define NFS4_DEF_SLOT_TABLE_SIZE (16U) -#define NFS4_MAX_SLOT_TABLE (256U) -#define NFS4_NO_SLOT ((u32)-1) +#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE #if defined(CONFIG_NFS_V4) /* Sessions */ -#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) +#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long))) struct nfs4_slot_table { struct nfs4_slot *slots; /* seqid per slot */ unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ spinlock_t slot_tbl_lock; struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ - u32 max_slots; /* # slots in table */ - u32 highest_used_slotid; /* sent to server on each SEQ. + int max_slots; /* # slots in table */ + int highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ - u32 target_max_slots; /* Set by CB_RECALL_SLOT as + int target_max_slots; /* Set by CB_RECALL_SLOT as * the new max_slots */ struct completion complete; }; diff --git a/trunk/include/linux/nfs_idmap.h b/trunk/include/linux/nfs_idmap.h index 7eed2012d288..308c18877018 100644 --- a/trunk/include/linux/nfs_idmap.h +++ b/trunk/include/linux/nfs_idmap.h @@ -69,22 +69,36 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; -#ifdef CONFIG_NFS_V4 +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER + int nfs_idmap_init(void); void nfs_idmap_quit(void); -#else + +static inline int nfs_idmap_new(struct nfs_client *clp) +{ + return 0; +} + +static inline void nfs_idmap_delete(struct nfs_client *clp) +{ +} + +#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ + static inline int nfs_idmap_init(void) { return 0; } static inline void nfs_idmap_quit(void) -{} -#endif +{ +} int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ + void nfs_fattr_init_names(struct nfs_fattr *fattr, struct nfs4_string *owner_name, struct nfs4_string *group_name); diff --git a/trunk/include/linux/nfs_iostat.h b/trunk/include/linux/nfs_iostat.h index 9dcbbe9a51fb..8866bb3502ee 100644 --- a/trunk/include/linux/nfs_iostat.h +++ b/trunk/include/linux/nfs_iostat.h @@ -21,7 +21,7 @@ #ifndef _LINUX_NFS_IOSTAT #define _LINUX_NFS_IOSTAT -#define NFS_IOSTAT_VERS "1.1" +#define NFS_IOSTAT_VERS "1.0" /* * NFS byte counters diff --git a/trunk/include/linux/nfs_page.h b/trunk/include/linux/nfs_page.h index eac30d6bec17..ab465fe8c3d6 100644 --- a/trunk/include/linux/nfs_page.h +++ b/trunk/include/linux/nfs_page.h @@ -18,6 +18,12 @@ #include +/* + * Valid flags for the radix tree + */ +#define NFS_PAGE_TAG_LOCKED 0 +#define NFS_PAGE_TAG_COMMIT 1 + /* * Valid flags for a dirty buffer */ @@ -27,13 +33,16 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, + PG_PNFS_COMMIT, PG_PARTIAL_READ_FAILED, - PG_COMMIT_TO_DS, }; struct nfs_inode; struct nfs_page { - struct list_head wb_list; /* Defines state of page: */ + union { + struct list_head wb_list; /* Defines state of page: */ + struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */ + }; struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ @@ -81,6 +90,8 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, extern void nfs_release_request(struct nfs_page *req); +extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, + pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, @@ -95,6 +106,8 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); +extern int nfs_set_page_tag_locked(struct nfs_page *req); +extern void nfs_clear_page_tag_locked(struct nfs_page *req); /* * Lock the page of an asynchronous request without getting a new reference @@ -105,16 +118,6 @@ nfs_lock_request_dontget(struct nfs_page *req) return !test_and_set_bit(PG_BUSY, &req->wb_flags); } -static inline int -nfs_lock_request(struct nfs_page *req) -{ - if (test_and_set_bit(PG_BUSY, &req->wb_flags)) - return 0; - kref_get(&req->wb_kref); - return 1; -} - - /** * nfs_list_add_request - Insert a request into a list * @req: request diff --git a/trunk/include/linux/nfs_xdr.h b/trunk/include/linux/nfs_xdr.h index bfd0d1bf6707..d6ba9a12591e 100644 --- a/trunk/include/linux/nfs_xdr.h +++ b/trunk/include/linux/nfs_xdr.h @@ -2,6 +2,7 @@ #define _LINUX_NFS_XDR_H #include +#include #include /* @@ -88,12 +89,11 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_PRECTIME (1U << 16) #define NFS_ATTR_FATTR_CHANGE (1U << 17) #define NFS_ATTR_FATTR_PRECHANGE (1U << 18) -#define NFS_ATTR_FATTR_V4_LOCATIONS (1U << 19) -#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 20) -#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 21) -#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22) -#define NFS_ATTR_FATTR_OWNER_NAME (1U << 23) -#define NFS_ATTR_FATTR_GROUP_NAME (1U << 24) +#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ +#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 20) /* Treat as mountpoint */ +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 21) +#define NFS_ATTR_FATTR_OWNER_NAME (1U << 22) +#define NFS_ATTR_FATTR_GROUP_NAME (1U << 23) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ @@ -182,7 +182,7 @@ struct nfs4_slot { struct nfs4_sequence_args { struct nfs4_session *sa_session; - u32 sa_slotid; + u8 sa_slotid; u8 sa_cache_this; }; @@ -977,7 +977,6 @@ struct nfs4_server_caps_res { u32 acl_bitmask; u32 has_links; u32 has_symlinks; - u32 fh_expire_type; struct nfs4_sequence_res seq_res; }; @@ -1056,6 +1055,14 @@ struct nfstime4 { }; #ifdef CONFIG_NFS_V4_1 +struct nfs_impl_id4 { + u32 domain_len; + char *domain; + u32 name_len; + char *name; + struct nfstime4 date; +}; + #define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; @@ -1076,17 +1083,10 @@ struct server_scope { char server_scope[NFS4_OPAQUE_LIMIT]; }; -struct nfs41_impl_id { - char domain[NFS4_OPAQUE_LIMIT + 1]; - char name[NFS4_OPAQUE_LIMIT + 1]; - struct nfstime4 date; -}; - struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; struct server_scope *server_scope; - struct nfs41_impl_id *impl_id; }; struct nfs41_create_session_args { @@ -1192,27 +1192,6 @@ struct nfs_write_data { struct page *page_array[NFS_PAGEVEC_SIZE]; }; -struct nfs_unlinkdata { - struct hlist_node list; - struct nfs_removeargs args; - struct nfs_removeres res; - struct inode *dir; - struct rpc_cred *cred; - struct nfs_fattr dir_attr; -}; - -struct nfs_renamedata { - struct nfs_renameargs args; - struct nfs_renameres res; - struct rpc_cred *cred; - struct inode *old_dir; - struct dentry *old_dentry; - struct nfs_fattr old_fattr; - struct inode *new_dir; - struct dentry *new_dentry; - struct nfs_fattr new_fattr; -}; - struct nfs_access_entry; struct nfs_client; struct rpc_timeout; @@ -1242,12 +1221,10 @@ struct nfs_rpc_ops { struct iattr *, int, struct nfs_open_context *); int (*remove) (struct inode *, struct qstr *); void (*unlink_setup) (struct rpc_message *, struct inode *dir); - void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); void (*rename_setup) (struct rpc_message *msg, struct inode *dir); - void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, struct qstr *); int (*symlink) (struct inode *, struct dentry *, struct page *, @@ -1267,10 +1244,8 @@ struct nfs_rpc_ops { int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); void (*read_setup) (struct nfs_read_data *, struct rpc_message *); - void (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); - void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); int (*commit_done) (struct rpc_task *, struct nfs_write_data *); @@ -1300,11 +1275,11 @@ struct nfs_rpc_ops { extern const struct nfs_rpc_ops nfs_v2_clientops; extern const struct nfs_rpc_ops nfs_v3_clientops; extern const struct nfs_rpc_ops nfs_v4_clientops; -extern const struct rpc_version nfs_version2; -extern const struct rpc_version nfs_version3; -extern const struct rpc_version nfs_version4; +extern struct rpc_version nfs_version2; +extern struct rpc_version nfs_version3; +extern struct rpc_version nfs_version4; -extern const struct rpc_version nfsacl_version3; -extern const struct rpc_program nfsacl_program; +extern struct rpc_version nfsacl_version3; +extern struct rpc_program nfsacl_program; #endif diff --git a/trunk/include/linux/pci.h b/trunk/include/linux/pci.h index e444f5b49118..900da5db60ee 100644 --- a/trunk/include/linux/pci.h +++ b/trunk/include/linux/pci.h @@ -299,6 +299,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */ /* These fields are used by common fixups */ unsigned int transparent:1; /* Transparent PCI bridge */ @@ -368,17 +369,24 @@ static inline int pci_channel_offline(struct pci_dev *pdev) return (pdev->error_state != pci_channel_io_normal); } -struct pci_host_bridge_window { - struct list_head list; - struct resource *res; /* host bridge aperture (CPU address) */ - resource_size_t offset; /* bus address + offset = CPU address */ -}; +static inline struct pci_cap_saved_state *pci_find_saved_cap( + struct pci_dev *pci_dev, char cap) +{ + struct pci_cap_saved_state *tmp; + struct hlist_node *pos; -struct pci_host_bridge { - struct list_head list; - struct pci_bus *bus; /* root bus */ - struct list_head windows; /* pci_host_bridge_windows */ -}; + hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { + if (tmp->cap.cap_nr == cap) + return tmp; + } + return NULL; +} + +static inline void pci_add_saved_cap(struct pci_dev *pci_dev, + struct pci_cap_saved_state *new_cap) +{ + hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space); +} /* * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond @@ -648,10 +656,6 @@ void pci_fixup_cardbus(struct pci_bus *); /* Generic PCI functions used internally */ -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res); -void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region); void pcibios_scan_specific_bus(int busn); extern struct pci_bus *pci_find_bus(int domain, int busnr); void pci_bus_add_devices(const struct pci_bus *bus); @@ -686,8 +690,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); -extern void __pci_remove_bus_device(struct pci_dev *dev); -extern void pci_stop_and_remove_bus_device(struct pci_dev *dev); +extern void pci_remove_bus_device(struct pci_dev *dev); extern void pci_stop_bus_device(struct pci_dev *dev); void pci_setup_cardbus(struct pci_bus *bus); extern void pci_sort_breadthfirst(void); @@ -880,7 +883,6 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); #ifdef CONFIG_HOTPLUG -unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge); unsigned int pci_rescan_bus(struct pci_bus *bus); #endif @@ -890,13 +892,13 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ -resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); void pci_assign_unassigned_resources(void); void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pdev_enable_device(struct pci_dev *); +void pdev_sort_resources(struct pci_dev *, struct resource_list *); int pci_enable_resources(struct pci_dev *, int mask); void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(const struct pci_dev *, u8, u8)); @@ -913,8 +915,6 @@ void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ void pci_add_resource(struct list_head *resources, struct resource *res); -void pci_add_resource_offset(struct list_head *resources, struct resource *res, - resource_size_t offset); void pci_free_resource_list(struct list_head *resources); void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); @@ -960,7 +960,7 @@ void pci_unregister_driver(struct pci_driver *dev); module_driver(__pci_driver, pci_register_driver, \ pci_unregister_driver) -void pci_stop_and_remove_behind_bridge(struct pci_dev *dev); +void pci_remove_behind_bridge(struct pci_dev *dev); struct pci_driver *pci_dev_driver(const struct pci_dev *dev); int pci_add_dynid(struct pci_driver *drv, unsigned int vendor, unsigned int device, @@ -1396,10 +1396,7 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, */ struct pci_fixup { - u16 vendor; /* You can use PCI_ANY_ID here of course */ - u16 device; /* You can use PCI_ANY_ID here of course */ - u32 class; /* You can use PCI_ANY_ID here too */ - unsigned int class_shift; /* should be 0, 8, 16 */ + u16 vendor, device; /* You can use PCI_ANY_ID here of course */ void (*hook)(struct pci_dev *dev); }; @@ -1414,68 +1411,30 @@ enum pci_fixup_pass { }; /* Anonymous variables would be nice... */ -#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \ - class_shift, hook) \ - static const struct pci_fixup const __pci_fixup_##name __used \ - __attribute__((__section__(#section), aligned((sizeof(void *))))) \ - = { vendor, device, class, class_shift, hook }; - -#define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early, \ - vendor##device##hook, vendor, device, class, class_shift, hook) -#define DECLARE_PCI_FIXUP_CLASS_HEADER(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header, \ - vendor##device##hook, vendor, device, class, class_shift, hook) -#define DECLARE_PCI_FIXUP_CLASS_FINAL(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final, \ - vendor##device##hook, vendor, device, class, class_shift, hook) -#define DECLARE_PCI_FIXUP_CLASS_ENABLE(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable, \ - vendor##device##hook, vendor, device, class, class_shift, hook) -#define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ - resume##vendor##device##hook, vendor, device, class, \ - class_shift, hook) -#define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ - resume_early##vendor##device##hook, vendor, device, \ - class, class_shift, hook) -#define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class, \ - class_shift, hook) \ - DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ - suspend##vendor##device##hook, vendor, device, class, \ - class_shift, hook) - +#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, hook) \ + static const struct pci_fixup __pci_fixup_##name __used \ + __attribute__((__section__(#section))) = { vendor, device, hook }; #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early, \ - vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook) + vendor##device##hook, vendor, device, hook) #define DECLARE_PCI_FIXUP_HEADER(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header, \ - vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook) + vendor##device##hook, vendor, device, hook) #define DECLARE_PCI_FIXUP_FINAL(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final, \ - vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook) + vendor##device##hook, vendor, device, hook) #define DECLARE_PCI_FIXUP_ENABLE(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable, \ - vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook) + vendor##device##hook, vendor, device, hook) #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ - resume##vendor##device##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + resume##vendor##device##hook, vendor, device, hook) #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ - resume_early##vendor##device##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + resume_early##vendor##device##hook, vendor, device, hook) #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ - suspend##vendor##device##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + suspend##vendor##device##hook, vendor, device, hook) #ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); diff --git a/trunk/include/linux/pci_regs.h b/trunk/include/linux/pci_regs.h index 4b608f543412..e41a10f5ae83 100644 --- a/trunk/include/linux/pci_regs.h +++ b/trunk/include/linux/pci_regs.h @@ -391,7 +391,6 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ -#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */ #define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ diff --git a/trunk/include/linux/sunrpc/auth.h b/trunk/include/linux/sunrpc/auth.h index 492a36d72829..7874a8a56638 100644 --- a/trunk/include/linux/sunrpc/auth.h +++ b/trunk/include/linux/sunrpc/auth.h @@ -99,8 +99,6 @@ struct rpc_authops { struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); - int (*pipes_create)(struct rpc_auth *); - void (*pipes_destroy)(struct rpc_auth *); }; struct rpc_credops { diff --git a/trunk/include/linux/sunrpc/bc_xprt.h b/trunk/include/linux/sunrpc/bc_xprt.h index 969c0a671dbf..f7f3ce340c08 100644 --- a/trunk/include/linux/sunrpc/bc_xprt.h +++ b/trunk/include/linux/sunrpc/bc_xprt.h @@ -35,7 +35,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); -void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs); int bc_send(struct rpc_rqst *req); /* diff --git a/trunk/include/linux/sunrpc/cache.h b/trunk/include/linux/sunrpc/cache.h index f5fd6160dbca..57531f8e5956 100644 --- a/trunk/include/linux/sunrpc/cache.h +++ b/trunk/include/linux/sunrpc/cache.h @@ -117,7 +117,6 @@ struct cache_detail { struct cache_detail_procfs procfs; struct cache_detail_pipefs pipefs; } u; - struct net *net; }; @@ -198,14 +197,11 @@ extern void cache_flush(void); extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) extern void __init cache_initialize(void); +extern int cache_register(struct cache_detail *cd); extern int cache_register_net(struct cache_detail *cd, struct net *net); +extern void cache_unregister(struct cache_detail *cd); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); -extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); -extern void cache_destroy_net(struct cache_detail *cd, struct net *net); - -extern void sunrpc_init_cache_detail(struct cache_detail *cd); -extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); diff --git a/trunk/include/linux/sunrpc/clnt.h b/trunk/include/linux/sunrpc/clnt.h index 523547ecfee2..2c5993a17c33 100644 --- a/trunk/include/linux/sunrpc/clnt.h +++ b/trunk/include/linux/sunrpc/clnt.h @@ -35,13 +35,14 @@ struct rpc_clnt { struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ - struct rpc_xprt __rcu * cl_xprt; /* transport */ + struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ - const char * cl_protname; /* protocol name */ + char * cl_server; /* server machine name */ + char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ struct rpc_stat * cl_stats; /* per-program statistics */ struct rpc_iostats * cl_metrics; /* per-client statistics */ @@ -56,11 +57,12 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - struct dentry * cl_dentry; + struct path cl_path; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; - const struct rpc_program *cl_program; + struct rpc_program * cl_program; + char cl_inline_name[32]; char *cl_principal; /* target to authenticate to */ }; @@ -69,12 +71,12 @@ struct rpc_clnt { */ #define RPC_MAXVERSION 4 struct rpc_program { - const char * name; /* protocol name */ + char * name; /* protocol name */ u32 number; /* program number */ unsigned int nrvers; /* number of versions */ - const struct rpc_version ** version; /* version array */ + struct rpc_version ** version; /* version array */ struct rpc_stat * stats; /* statistics */ - const char * pipe_dir_name; /* path to rpc_pipefs dir */ + char * pipe_dir_name; /* path to rpc_pipefs dir */ }; struct rpc_version { @@ -95,7 +97,7 @@ struct rpc_procinfo { unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ - const char * p_name; /* name of procedure */ + char * p_name; /* name of procedure */ }; #ifdef __KERNEL__ @@ -107,8 +109,8 @@ struct rpc_create_args { size_t addrsize; struct sockaddr *saddress; const struct rpc_timeout *timeout; - const char *servername; - const struct rpc_program *program; + char *servername; + struct rpc_program *program; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; @@ -127,18 +129,17 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, - const struct rpc_program *, u32); + struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_task_release_client(struct rpc_task *); -int rpcb_create_local(struct net *); -void rpcb_put_local(struct net *); -int rpcb_register(struct net *, u32, u32, int, unsigned short); -int rpcb_v4_register(struct net *net, const u32 program, - const u32 version, +int rpcb_create_local(void); +void rpcb_put_local(void); +int rpcb_register(u32, u32, int, unsigned short); +int rpcb_v4_register(const u32 program, const u32 version, const struct sockaddr *address, const char *netid); void rpcb_getport_async(struct rpc_task *); @@ -155,19 +156,16 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int rpc_restart_call_prepare(struct rpc_task *); int rpc_restart_call(struct rpc_task *); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); -int rpc_protocol(struct rpc_clnt *); -struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(struct net *, const char *, const size_t, +size_t rpc_pton(const char *, const size_t, struct sockaddr *, const size_t); char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, +size_t rpc_uaddr2sockaddr(const char *, const size_t, struct sockaddr *, const size_t); static inline unsigned short rpc_get_port(const struct sockaddr *sap) diff --git a/trunk/include/linux/sunrpc/debug.h b/trunk/include/linux/sunrpc/debug.h index a76cc20d98ce..c2786f20016f 100644 --- a/trunk/include/linux/sunrpc/debug.h +++ b/trunk/include/linux/sunrpc/debug.h @@ -31,12 +31,9 @@ /* * Enable RPC debugging/profiling. */ -#ifdef CONFIG_SUNRPC_DEBUG +#ifdef CONFIG_SYSCTL #define RPC_DEBUG #endif -#ifdef CONFIG_TRACEPOINTS -#define RPC_TRACEPOINTS -#endif /* #define RPC_PROFILE */ /* @@ -50,32 +47,15 @@ extern unsigned int nlm_debug; #endif #define dprintk(args...) dfprintk(FACILITY, ## args) -#define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) #undef ifdebug #ifdef RPC_DEBUG # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) - -# define dfprintk(fac, args...) \ - do { \ - ifdebug(fac) \ - printk(KERN_DEFAULT args); \ - } while (0) - -# define dfprintk_rcu(fac, args...) \ - do { \ - ifdebug(fac) { \ - rcu_read_lock(); \ - printk(KERN_DEFAULT args); \ - rcu_read_unlock(); \ - } \ - } while (0) - +# define dfprintk(fac, args...) do { ifdebug(fac) printk(args); } while(0) # define RPC_IFDEBUG(x) x #else # define ifdebug(fac) if (0) -# define dfprintk(fac, args...) do {} while (0) -# define dfprintk_rcu(fac, args...) do {} while (0) +# define dfprintk(fac, args...) do ; while (0) # define RPC_IFDEBUG(x) #endif diff --git a/trunk/include/linux/sunrpc/metrics.h b/trunk/include/linux/sunrpc/metrics.h index 1565bbe86d51..b6edbc0ea83d 100644 --- a/trunk/include/linux/sunrpc/metrics.h +++ b/trunk/include/linux/sunrpc/metrics.h @@ -74,16 +74,14 @@ struct rpc_clnt; #ifdef CONFIG_PROC_FS struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); -void rpc_count_iostats(const struct rpc_task *, - struct rpc_iostats *); +void rpc_count_iostats(struct rpc_task *); void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); void rpc_free_iostats(struct rpc_iostats *); #else /* CONFIG_PROC_FS */ static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } -static inline void rpc_count_iostats(const struct rpc_task *task, - struct rpc_iostats *stats) {} +static inline void rpc_count_iostats(struct rpc_task *task) {} static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} static inline void rpc_free_iostats(struct rpc_iostats *stats) {} diff --git a/trunk/include/linux/sunrpc/rpc_pipe_fs.h b/trunk/include/linux/sunrpc/rpc_pipe_fs.h index a7b422b33eda..2bb03d77375a 100644 --- a/trunk/include/linux/sunrpc/rpc_pipe_fs.h +++ b/trunk/include/linux/sunrpc/rpc_pipe_fs.h @@ -21,26 +21,21 @@ struct rpc_pipe_ops { void (*destroy_msg)(struct rpc_pipe_msg *); }; -struct rpc_pipe { +struct rpc_inode { + struct inode vfs_inode; + void *private; struct list_head pipe; struct list_head in_upcall; struct list_head in_downcall; int pipelen; int nreaders; int nwriters; + int nkern_readwriters; + wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; struct delayed_work queue_timeout; const struct rpc_pipe_ops *ops; - spinlock_t lock; - struct dentry *dentry; -}; - -struct rpc_inode { - struct inode vfs_inode; - void *private; - struct rpc_pipe *pipe; - wait_queue_head_t waitq; }; static inline struct rpc_inode * @@ -49,28 +44,9 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } -enum { - SUNRPC_PIPEFS_NFS_PRIO, - SUNRPC_PIPEFS_RPC_PRIO, -}; - -extern int rpc_pipefs_notifier_register(struct notifier_block *); -extern void rpc_pipefs_notifier_unregister(struct notifier_block *); - -enum { - RPC_PIPEFS_MOUNT, - RPC_PIPEFS_UMOUNT, -}; - -extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb, - const unsigned char *dir_name); -extern void rpc_pipefs_init_net(struct net *net); -extern struct super_block *rpc_get_sb_net(const struct net *net); -extern void rpc_put_sb_net(const struct net *net); - extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); -extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); +extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); @@ -83,13 +59,11 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); -extern int rpc_rmdir(struct dentry *dentry); - -struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); -void rpc_destroy_pipe_data(struct rpc_pipe *pipe); -extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, - struct rpc_pipe *); +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, + const struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); +extern struct vfsmount *rpc_get_mount(void); +extern void rpc_put_mount(void); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); diff --git a/trunk/include/linux/sunrpc/sched.h b/trunk/include/linux/sunrpc/sched.h index dc0c3cc3ada3..e7756896f3ca 100644 --- a/trunk/include/linux/sunrpc/sched.h +++ b/trunk/include/linux/sunrpc/sched.h @@ -103,7 +103,6 @@ typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { void (*rpc_call_prepare)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); - void (*rpc_count_stats)(struct rpc_task *, void *); void (*rpc_release)(void *); }; @@ -196,7 +195,7 @@ struct rpc_wait_queue { unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#ifdef RPC_DEBUG const char * name; #endif }; @@ -236,9 +235,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); -struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, - bool (*)(struct rpc_task *, void *), - void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); int rpc_queue_empty(struct rpc_wait_queue *); void rpc_delay(struct rpc_task *, unsigned long); @@ -248,8 +244,7 @@ int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); #ifdef RPC_DEBUG -struct net; -void rpc_show_tasks(struct net *); +void rpc_show_tasks(void); #endif int rpc_init_mempool(void); void rpc_destroy_mempool(void); @@ -271,22 +266,11 @@ static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char pri return (task->tk_priority + RPC_PRIORITY_LOW == prio); } -#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS) -static inline const char * rpc_qname(const struct rpc_wait_queue *q) +#ifdef RPC_DEBUG +static inline const char * rpc_qname(struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); } - -static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, - const char *name) -{ - q->name = name; -} -#else -static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, - const char *name) -{ -} #endif #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/trunk/include/linux/sunrpc/stats.h b/trunk/include/linux/sunrpc/stats.h index edc64219f92b..680471d1f28a 100644 --- a/trunk/include/linux/sunrpc/stats.h +++ b/trunk/include/linux/sunrpc/stats.h @@ -12,7 +12,7 @@ #include struct rpc_stat { - const struct rpc_program *program; + struct rpc_program * program; unsigned int netcnt, netudpcnt, @@ -58,24 +58,24 @@ void rpc_modcount(struct inode *, int); #endif #ifdef CONFIG_PROC_FS -struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); -void rpc_proc_unregister(struct net *,const char *); -void rpc_proc_zero(const struct rpc_program *); -struct proc_dir_entry * svc_proc_register(struct net *, struct svc_stat *, +struct proc_dir_entry * rpc_proc_register(struct rpc_stat *); +void rpc_proc_unregister(const char *); +void rpc_proc_zero(struct rpc_program *); +struct proc_dir_entry * svc_proc_register(struct svc_stat *, const struct file_operations *); -void svc_proc_unregister(struct net *, const char *); +void svc_proc_unregister(const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); #else -static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } -static inline void rpc_proc_unregister(struct net *net, const char *p) {} -static inline void rpc_proc_zero(const struct rpc_program *p) {} +static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } +static inline void rpc_proc_unregister(const char *p) {} +static inline void rpc_proc_zero(struct rpc_program *p) {} -static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s, +static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, const struct file_operations *f) { return NULL; } -static inline void svc_proc_unregister(struct net *net, const char *p) {} +static inline void svc_proc_unregister(const char *p) {} static inline void svc_seq_show(struct seq_file *seq, const struct svc_stat *st) {} diff --git a/trunk/include/linux/sunrpc/svc.h b/trunk/include/linux/sunrpc/svc.h index 51b29ac45a8e..35b37b1e9299 100644 --- a/trunk/include/linux/sunrpc/svc.h +++ b/trunk/include/linux/sunrpc/svc.h @@ -84,8 +84,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - void (*sv_shutdown)(struct svc_serv *serv, - struct net *net); + void (*sv_shutdown)(struct svc_serv *serv); /* Callback to use when last thread * exits. */ @@ -414,24 +413,22 @@ struct svc_procedure { /* * Function prototypes. */ -int svc_rpcb_setup(struct svc_serv *serv, struct net *net); -void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); +void svc_rpcb_cleanup(struct svc_serv *serv); struct svc_serv *svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net)); + void (*shutdown)(struct svc_serv *)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net), + void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); -void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); -int svc_register(const struct svc_serv *, struct net *, const int, +int svc_register(const struct svc_serv *, const int, const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); diff --git a/trunk/include/linux/sunrpc/svc_xprt.h b/trunk/include/linux/sunrpc/svc_xprt.h index b3f64b12f141..dfa900948af7 100644 --- a/trunk/include/linux/sunrpc/svc_xprt.h +++ b/trunk/include/linux/sunrpc/svc_xprt.h @@ -121,8 +121,7 @@ void svc_close_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, - struct net *net, const sa_family_t af, - const unsigned short port); + const sa_family_t af, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) diff --git a/trunk/include/linux/sunrpc/svcauth.h b/trunk/include/linux/sunrpc/svcauth.h index 548790e9113b..25d333c1b571 100644 --- a/trunk/include/linux/sunrpc/svcauth.h +++ b/trunk/include/linux/sunrpc/svcauth.h @@ -135,9 +135,6 @@ extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(struct svc_xprt *xpt); extern int svcauth_unix_set_client(struct svc_rqst *rqstp); -extern int unix_gid_cache_create(struct net *net); -extern void unix_gid_cache_destroy(struct net *net); - static inline unsigned long hash_str(char *name, int bits) { unsigned long hash = 0; diff --git a/trunk/include/linux/sunrpc/svcauth_gss.h b/trunk/include/linux/sunrpc/svcauth_gss.h index 7c32daa025eb..83bbee3f089c 100644 --- a/trunk/include/linux/sunrpc/svcauth_gss.h +++ b/trunk/include/linux/sunrpc/svcauth_gss.h @@ -18,8 +18,6 @@ int gss_svc_init(void); void gss_svc_shutdown(void); -int gss_svc_init_net(struct net *net); -void gss_svc_shutdown_net(struct net *net); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); char *svc_gss_principal(struct svc_rqst *); diff --git a/trunk/include/linux/sunrpc/svcsock.h b/trunk/include/linux/sunrpc/svcsock.h index cb4ac69e1f33..c84e9741cb2a 100644 --- a/trunk/include/linux/sunrpc/svcsock.h +++ b/trunk/include/linux/sunrpc/svcsock.h @@ -34,7 +34,7 @@ struct svc_sock { /* * Function prototypes. */ -void svc_close_net(struct svc_serv *, struct net *); +void svc_close_all(struct svc_serv *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); diff --git a/trunk/include/linux/sunrpc/xprt.h b/trunk/include/linux/sunrpc/xprt.h index 77d278defa70..15518a152ac3 100644 --- a/trunk/include/linux/sunrpc/xprt.h +++ b/trunk/include/linux/sunrpc/xprt.h @@ -21,8 +21,8 @@ #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) +#define RPC_MAX_SLOT_TABLE (128U) #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) -#define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT /* * This describes a timeout strategy @@ -219,17 +219,13 @@ struct rpc_xprt { connect_time, /* jiffies waiting for connect */ sends, /* how many complete requests */ recvs, /* how many complete requests */ - bad_xids, /* lookup_rqst didn't find XID */ - max_slots; /* max rpc_slots used */ + bad_xids; /* lookup_rqst didn't find XID */ unsigned long long req_u, /* average requests on the wire */ - bklog_u, /* backlog queue utilization */ - sending_u, /* send q utilization */ - pending_u; /* pend q utilization */ + bklog_u; /* backlog queue utilization */ } stat; struct net *xprt_net; - const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; }; @@ -259,7 +255,6 @@ struct xprt_create { struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; - const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; diff --git a/trunk/include/linux/sunrpc/xprtsock.h b/trunk/include/linux/sunrpc/xprtsock.h index 1ad36cc25b2e..3f14a02e9cc0 100644 --- a/trunk/include/linux/sunrpc/xprtsock.h +++ b/trunk/include/linux/sunrpc/xprtsock.h @@ -12,6 +12,18 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); +/* + * RPC slot table sizes for UDP, TCP transports + */ +extern unsigned int xprt_udp_slot_table_entries; +extern unsigned int xprt_tcp_slot_table_entries; + +/* + * Parameters for choosing a free port + */ +extern unsigned int xprt_min_resvport; +extern unsigned int xprt_max_resvport; + #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) #define RPC_DEF_MIN_RESVPORT (665U) diff --git a/trunk/include/trace/events/sunrpc.h b/trunk/include/trace/events/sunrpc.h deleted file mode 100644 index 43be87d5dd58..000000000000 --- a/trunk/include/trace/events/sunrpc.h +++ /dev/null @@ -1,177 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sunrpc - -#if !defined(_TRACE_SUNRPC_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SUNRPC_H - -#include -#include -#include - -DECLARE_EVENT_CLASS(rpc_task_status, - - TP_PROTO(struct rpc_task *task), - - TP_ARGS(task), - - TP_STRUCT__entry( - __field(const struct rpc_task *, task) - __field(const struct rpc_clnt *, clnt) - __field(int, status) - ), - - TP_fast_assign( - __entry->task = task; - __entry->clnt = task->tk_client; - __entry->status = task->tk_status; - ), - - TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status) -); - -DEFINE_EVENT(rpc_task_status, rpc_call_status, - TP_PROTO(struct rpc_task *task), - - TP_ARGS(task) -); - -DEFINE_EVENT(rpc_task_status, rpc_bind_status, - TP_PROTO(struct rpc_task *task), - - TP_ARGS(task) -); - -TRACE_EVENT(rpc_connect_status, - TP_PROTO(struct rpc_task *task, int status), - - TP_ARGS(task, status), - - TP_STRUCT__entry( - __field(const struct rpc_task *, task) - __field(const struct rpc_clnt *, clnt) - __field(int, status) - ), - - TP_fast_assign( - __entry->task = task; - __entry->clnt = task->tk_client; - __entry->status = status; - ), - - TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status) -); - -DECLARE_EVENT_CLASS(rpc_task_running, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), - - TP_ARGS(clnt, task, action), - - TP_STRUCT__entry( - __field(const struct rpc_clnt *, clnt) - __field(const struct rpc_task *, task) - __field(const void *, action) - __field(unsigned long, runstate) - __field(int, status) - __field(unsigned short, flags) - ), - - TP_fast_assign( - __entry->clnt = clnt; - __entry->task = task; - __entry->action = action; - __entry->runstate = task->tk_runstate; - __entry->status = task->tk_status; - __entry->flags = task->tk_flags; - ), - - TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d action=%pf", - __entry->task, - __entry->clnt, - __entry->flags, - __entry->runstate, - __entry->status, - __entry->action - ) -); - -DEFINE_EVENT(rpc_task_running, rpc_task_begin, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), - - TP_ARGS(clnt, task, action) - -); - -DEFINE_EVENT(rpc_task_running, rpc_task_run_action, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), - - TP_ARGS(clnt, task, action) - -); - -DEFINE_EVENT(rpc_task_running, rpc_task_complete, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), - - TP_ARGS(clnt, task, action) - -); - -DECLARE_EVENT_CLASS(rpc_task_queued, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), - - TP_ARGS(clnt, task, q), - - TP_STRUCT__entry( - __field(const struct rpc_clnt *, clnt) - __field(const struct rpc_task *, task) - __field(unsigned long, timeout) - __field(unsigned long, runstate) - __field(int, status) - __field(unsigned short, flags) - __string(q_name, rpc_qname(q)) - ), - - TP_fast_assign( - __entry->clnt = clnt; - __entry->task = task; - __entry->timeout = task->tk_timeout; - __entry->runstate = task->tk_runstate; - __entry->status = task->tk_status; - __entry->flags = task->tk_flags; - __assign_str(q_name, rpc_qname(q)); - ), - - TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", - __entry->task, - __entry->clnt, - __entry->flags, - __entry->runstate, - __entry->status, - __entry->timeout, - __get_str(q_name) - ) -); - -DEFINE_EVENT(rpc_task_queued, rpc_task_sleep, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), - - TP_ARGS(clnt, task, q) - -); - -DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, - - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), - - TP_ARGS(clnt, task, q) - -); - -#endif /* _TRACE_SUNRPC_H */ - -#include diff --git a/trunk/kernel/debug/debug_core.c b/trunk/kernel/debug/debug_core.c index 3f88a45e6f0a..0d7c08784efb 100644 --- a/trunk/kernel/debug/debug_core.c +++ b/trunk/kernel/debug/debug_core.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -76,8 +75,6 @@ static int exception_level; struct kgdb_io *dbg_io_ops; static DEFINE_SPINLOCK(kgdb_registration_lock); -/* Action for the reboot notifiter, a global allow kdb to change it */ -static int kgdbreboot; /* kgdb console driver is loaded */ static int kgdb_con_registered; /* determine if kgdb console output should be used */ @@ -99,7 +96,6 @@ static int __init opt_kgdb_con(char *str) early_param("kgdbcon", opt_kgdb_con); module_param(kgdb_use_con, int, 0644); -module_param(kgdbreboot, int, 0644); /* * Holds information about breakpoints in a kernel. These breakpoints are @@ -788,33 +784,6 @@ void __init dbg_late_init(void) kdb_init(KDB_INIT_FULL); } -static int -dbg_notify_reboot(struct notifier_block *this, unsigned long code, void *x) -{ - /* - * Take the following action on reboot notify depending on value: - * 1 == Enter debugger - * 0 == [the default] detatch debug client - * -1 == Do nothing... and use this until the board resets - */ - switch (kgdbreboot) { - case 1: - kgdb_breakpoint(); - case -1: - goto done; - } - if (!dbg_kdb_mode) - gdbstub_exit(code); -done: - return NOTIFY_DONE; -} - -static struct notifier_block dbg_reboot_notifier = { - .notifier_call = dbg_notify_reboot, - .next = NULL, - .priority = INT_MAX, -}; - static void kgdb_register_callbacks(void) { if (!kgdb_io_module_registered) { @@ -822,7 +791,6 @@ static void kgdb_register_callbacks(void) kgdb_arch_init(); if (!dbg_is_early) kgdb_arch_late(); - register_reboot_notifier(&dbg_reboot_notifier); atomic_notifier_chain_register(&panic_notifier_list, &kgdb_panic_event_nb); #ifdef CONFIG_MAGIC_SYSRQ @@ -844,7 +812,6 @@ static void kgdb_unregister_callbacks(void) */ if (kgdb_io_module_registered) { kgdb_io_module_registered = 0; - unregister_reboot_notifier(&dbg_reboot_notifier); atomic_notifier_chain_unregister(&panic_notifier_list, &kgdb_panic_event_nb); kgdb_arch_exit(); diff --git a/trunk/kernel/debug/gdbstub.c b/trunk/kernel/debug/gdbstub.c index ce615e064482..c22d8c28ad84 100644 --- a/trunk/kernel/debug/gdbstub.c +++ b/trunk/kernel/debug/gdbstub.c @@ -1111,13 +1111,6 @@ void gdbstub_exit(int status) unsigned char checksum, ch, buffer[3]; int loop; - if (!kgdb_connected) - return; - kgdb_connected = 0; - - if (!dbg_io_ops || dbg_kdb_mode) - return; - buffer[0] = 'W'; buffer[1] = hex_asc_hi(status); buffer[2] = hex_asc_lo(status); @@ -1136,6 +1129,5 @@ void gdbstub_exit(int status) dbg_io_ops->write_char(hex_asc_lo(checksum)); /* make sure the output is flushed, lest the bootloader clobber it */ - if (dbg_io_ops->flush) - dbg_io_ops->flush(); + dbg_io_ops->flush(); } diff --git a/trunk/kernel/debug/kdb/kdb_bp.c b/trunk/kernel/debug/kdb/kdb_bp.c index 8418c2f8ec5d..20059ef4459a 100644 --- a/trunk/kernel/debug/kdb/kdb_bp.c +++ b/trunk/kernel/debug/kdb/kdb_bp.c @@ -153,13 +153,6 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp) } else { kdb_printf("%s: failed to set breakpoint at 0x%lx\n", __func__, bp->bp_addr); -#ifdef CONFIG_DEBUG_RODATA - if (!bp->bp_type) { - kdb_printf("Software breakpoints are unavailable.\n" - " Change the kernel CONFIG_DEBUG_RODATA=n\n" - " OR use hw breaks: help bph\n"); - } -#endif return 1; } return 0; diff --git a/trunk/kernel/debug/kdb/kdb_io.c b/trunk/kernel/debug/kdb/kdb_io.c index 9b5f17da1c56..4802eb5840e1 100644 --- a/trunk/kernel/debug/kdb/kdb_io.c +++ b/trunk/kernel/debug/kdb/kdb_io.c @@ -689,7 +689,7 @@ int vkdb_printf(const char *fmt, va_list ap) if (!dbg_kdb_mode && kgdb_connected) { gdbstub_msg_write(kdb_buffer, retlen); } else { - if (dbg_io_ops && !dbg_io_ops->is_console) { + if (!dbg_io_ops->is_console) { len = strlen(kdb_buffer); cp = kdb_buffer; while (len--) { diff --git a/trunk/kernel/debug/kdb/kdb_keyboard.c b/trunk/kernel/debug/kdb/kdb_keyboard.c index 118527aa60ea..4bca634975c0 100644 --- a/trunk/kernel/debug/kdb/kdb_keyboard.c +++ b/trunk/kernel/debug/kdb/kdb_keyboard.c @@ -25,7 +25,6 @@ #define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ static int kbd_exists; -static int kbd_last_ret; /* * Check if the keyboard controller has a keypress for us. @@ -91,11 +90,8 @@ int kdb_get_kbd_char(void) return -1; } - if ((scancode & 0x80) != 0) { - if (scancode == 0x9c) - kbd_last_ret = 0; + if ((scancode & 0x80) != 0) return -1; - } scancode &= 0x7f; @@ -182,82 +178,35 @@ int kdb_get_kbd_char(void) return -1; /* ignore unprintables */ } - if (scancode == 0x1c) { - kbd_last_ret = 1; - return 13; - } - - return keychar & 0xff; -} -EXPORT_SYMBOL_GPL(kdb_get_kbd_char); - -/* - * Best effort cleanup of ENTER break codes on leaving KDB. Called on - * exiting KDB, when we know we processed an ENTER or KP ENTER scan - * code. - */ -void kdb_kbd_cleanup_state(void) -{ - int scancode, scanstatus; - - /* - * Nothing to clean up, since either - * ENTER was never pressed, or has already - * gotten cleaned up. - */ - if (!kbd_last_ret) - return; - - kbd_last_ret = 0; - /* - * Enter key. Need to absorb the break code here, lest it gets - * leaked out if we exit KDB as the result of processing 'g'. - * - * This has several interesting implications: - * + Need to handle KP ENTER, which has break code 0xe0 0x9c. - * + Need to handle repeat ENTER and repeat KP ENTER. Repeats - * only get a break code at the end of the repeated - * sequence. This means we can't propagate the repeated key - * press, and must swallow it away. - * + Need to handle possible PS/2 mouse input. - * + Need to handle mashed keys. - */ - - while (1) { + if ((scancode & 0x7f) == 0x1c) { + /* + * enter key. All done. Absorb the release scancode. + */ while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) - cpu_relax(); + ; /* - * Fetch the scancode. + * Fetch the scancode */ scancode = inb(KBD_DATA_REG); scanstatus = inb(KBD_STATUS_REG); - /* - * Skip mouse input. - */ - if (scanstatus & KBD_STAT_MOUSE_OBF) - continue; + while (scanstatus & KBD_STAT_MOUSE_OBF) { + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + } - /* - * If we see 0xe0, this is either a break code for KP - * ENTER, or a repeat make for KP ENTER. Either way, - * since the second byte is equivalent to an ENTER, - * skip the 0xe0 and try again. - * - * If we see 0x1c, this must be a repeat ENTER or KP - * ENTER (and we swallowed 0xe0 before). Try again. - * - * We can also see make and break codes for other keys - * mashed before or after pressing ENTER. Thus, if we - * see anything other than 0x9c, we have to try again. - * - * Note, if you held some key as ENTER was depressed, - * that break code would get leaked out. - */ - if (scancode != 0x9c) - continue; + if (scancode != 0x9c) { + /* + * Wasn't an enter-release, why not? + */ + kdb_printf("kdb: expected enter got 0x%x status 0x%x\n", + scancode, scanstatus); + } - return; + return 13; } + + return keychar & 0xff; } +EXPORT_SYMBOL_GPL(kdb_get_kbd_char); diff --git a/trunk/kernel/debug/kdb/kdb_main.c b/trunk/kernel/debug/kdb/kdb_main.c index 67b847dfa2bb..e2ae7349437f 100644 --- a/trunk/kernel/debug/kdb/kdb_main.c +++ b/trunk/kernel/debug/kdb/kdb_main.c @@ -1400,9 +1400,6 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, if (KDB_STATE(DOING_SS)) KDB_STATE_CLEAR(SSBPT); - /* Clean up any keyboard devices before leaving */ - kdb_kbd_cleanup_state(); - return result; } diff --git a/trunk/kernel/debug/kdb/kdb_private.h b/trunk/kernel/debug/kdb/kdb_private.h index 47c4e56e513b..e381d105b40b 100644 --- a/trunk/kernel/debug/kdb/kdb_private.h +++ b/trunk/kernel/debug/kdb/kdb_private.h @@ -246,13 +246,6 @@ extern void debug_kusage(void); extern void kdb_set_current_task(struct task_struct *); extern struct task_struct *kdb_current_task; - -#ifdef CONFIG_KDB_KEYBOARD -extern void kdb_kbd_cleanup_state(void); -#else /* ! CONFIG_KDB_KEYBOARD */ -#define kdb_kbd_cleanup_state() -#endif /* ! CONFIG_KDB_KEYBOARD */ - #ifdef CONFIG_MODULES extern struct list_head *kdb_modules; #endif /* CONFIG_MODULES */ diff --git a/trunk/mm/vmscan.c b/trunk/mm/vmscan.c index 7658fd6536dd..49f15ef0a99a 100644 --- a/trunk/mm/vmscan.c +++ b/trunk/mm/vmscan.c @@ -2817,7 +2817,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, testorder = 0; if ((buffer_heads_over_limit && is_highmem_idx(i)) || - !zone_watermark_ok_safe(zone, testorder, + !zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) + balance_gap, end_zone, 0)) { shrink_zone(priority, zone, &sc); diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index 452db7090d18..0f3eb7d79a2d 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -3560,8 +3560,7 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { __skb_pull(skb, skb_headlen(skb)); - /* restore the reserve we had after netdev_alloc_skb_ip_align() */ - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); + skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; diff --git a/trunk/net/ipv4/devinet.c b/trunk/net/ipv4/devinet.c index d4fad5c77447..e41c40f48cfe 100644 --- a/trunk/net/ipv4/devinet.c +++ b/trunk/net/ipv4/devinet.c @@ -1079,7 +1079,6 @@ __be32 inet_confirm_addr(struct in_device *in_dev, return addr; } -EXPORT_SYMBOL(inet_confirm_addr); /* * Device notifier diff --git a/trunk/net/ipv4/netfilter/iptable_filter.c b/trunk/net/ipv4/netfilter/iptable_filter.c index 851acec852d2..0e58f09e59fb 100644 --- a/trunk/net/ipv4/netfilter/iptable_filter.c +++ b/trunk/net/ipv4/netfilter/iptable_filter.c @@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static bool forward = true; +static bool forward = NF_ACCEPT; module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) @@ -64,7 +64,7 @@ static int __net_init iptable_filter_net_init(struct net *net) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ipt_standard *)repl->entries)[1].target.verdict = - forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; + -forward - 1; net->ipv4.iptable_filter = ipt_register_table(net, &packet_filter, repl); @@ -88,6 +88,11 @@ static int __init iptable_filter_init(void) { int ret; + if (forward < 0 || forward > NF_MAX_VERDICT) { + pr_err("iptables forward must be 0 or 1\n"); + return -EINVAL; + } + ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; diff --git a/trunk/net/ipv6/netfilter/ip6table_filter.c b/trunk/net/ipv6/netfilter/ip6table_filter.c index 325e59a0224f..a8f6da97e3b2 100644 --- a/trunk/net/ipv6/netfilter/ip6table_filter.c +++ b/trunk/net/ipv6/netfilter/ip6table_filter.c @@ -44,7 +44,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static bool forward = true; +static bool forward = NF_ACCEPT; module_param(forward, bool, 0000); static int __net_init ip6table_filter_net_init(struct net *net) @@ -56,7 +56,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ip6t_standard *)repl->entries)[1].target.verdict = - forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; + -forward - 1; net->ipv6.ip6table_filter = ip6t_register_table(net, &packet_filter, repl); @@ -80,6 +80,11 @@ static int __init ip6table_filter_init(void) { int ret; + if (forward < 0 || forward > NF_MAX_VERDICT) { + pr_err("iptables forward must be 0 or 1\n"); + return -EINVAL; + } + ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) return ret; diff --git a/trunk/net/l2tp/l2tp_ppp.c b/trunk/net/l2tp/l2tp_ppp.c index 1addd9f3f40a..9b071910b4ba 100644 --- a/trunk/net/l2tp/l2tp_ppp.c +++ b/trunk/net/l2tp/l2tp_ppp.c @@ -1845,4 +1845,3 @@ MODULE_AUTHOR("James Chapman "); MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); -MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); diff --git a/trunk/net/netlabel/netlabel_kapi.c b/trunk/net/netlabel/netlabel_kapi.c index 7c94aedd0912..2560e7b441c6 100644 --- a/trunk/net/netlabel/netlabel_kapi.c +++ b/trunk/net/netlabel/netlabel_kapi.c @@ -597,7 +597,7 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, iter = iter->next; iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; } - ret_val = netlbl_secattr_catmap_setbit(iter, spot, flags); + ret_val = netlbl_secattr_catmap_setbit(iter, spot, GFP_ATOMIC); } return ret_val; diff --git a/trunk/net/rds/ib_cm.c b/trunk/net/rds/ib_cm.c index a1e116277477..51c868923f64 100644 --- a/trunk/net/rds/ib_cm.c +++ b/trunk/net/rds/ib_cm.c @@ -749,7 +749,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) int ret; /* XXX too lazy? */ - ic = kzalloc(sizeof(struct rds_ib_connection), gfp); + ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); if (!ic) return -ENOMEM; diff --git a/trunk/net/rds/iw_cm.c b/trunk/net/rds/iw_cm.c index a91e1db62ee6..9556d2895f7a 100644 --- a/trunk/net/rds/iw_cm.c +++ b/trunk/net/rds/iw_cm.c @@ -694,7 +694,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) unsigned long flags; /* XXX too lazy? */ - ic = kzalloc(sizeof(struct rds_iw_connection), gfp); + ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); if (!ic) return -ENOMEM; diff --git a/trunk/net/rds/loop.c b/trunk/net/rds/loop.c index 6b12b68541ae..87ff2a8a454b 100644 --- a/trunk/net/rds/loop.c +++ b/trunk/net/rds/loop.c @@ -121,7 +121,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) struct rds_loop_connection *lc; unsigned long flags; - lc = kzalloc(sizeof(struct rds_loop_connection), gfp); + lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); if (!lc) return -ENOMEM; diff --git a/trunk/net/sunrpc/Kconfig b/trunk/net/sunrpc/Kconfig index 9fe8857d8d59..ffd243d09188 100644 --- a/trunk/net/sunrpc/Kconfig +++ b/trunk/net/sunrpc/Kconfig @@ -39,16 +39,3 @@ config RPCSEC_GSS_KRB5 Kerberos support should be installed. If unsure, say Y. - -config SUNRPC_DEBUG - bool "RPC: Enable dprintk debugging" - depends on SUNRPC && SYSCTL - help - This option enables a sysctl-based debugging interface - that is be used by the 'rpcdebug' utility to turn on or off - logging of different aspects of the kernel RPC activity. - - Disabling this option will make your kernel slightly smaller, - but makes troubleshooting NFS issues significantly harder. - - If unsure, say Y. diff --git a/trunk/net/sunrpc/addr.c b/trunk/net/sunrpc/addr.c index d11418f97f1f..ee77742e0ed6 100644 --- a/trunk/net/sunrpc/addr.c +++ b/trunk/net/sunrpc/addr.c @@ -156,9 +156,8 @@ static size_t rpc_pton4(const char *buf, const size_t buflen, } #if IS_ENABLED(CONFIG_IPV6) -static int rpc_parse_scope_id(struct net *net, const char *buf, - const size_t buflen, const char *delim, - struct sockaddr_in6 *sin6) +static int rpc_parse_scope_id(const char *buf, const size_t buflen, + const char *delim, struct sockaddr_in6 *sin6) { char *p; size_t len; @@ -178,7 +177,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, unsigned long scope_id = 0; struct net_device *dev; - dev = dev_get_by_name(net, p); + dev = dev_get_by_name(&init_net, p); if (dev != NULL) { scope_id = dev->ifindex; dev_put(dev); @@ -198,7 +197,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, return 0; } -static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, +static size_t rpc_pton6(const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; @@ -214,14 +213,14 @@ static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) return 0; - if (!rpc_parse_scope_id(net, buf, buflen, delim, sin6)) + if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) return 0; sin6->sin6_family = AF_INET6; return sizeof(struct sockaddr_in6); } #else -static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, +static size_t rpc_pton6(const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { return 0; @@ -230,7 +229,6 @@ static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, /** * rpc_pton - Construct a sockaddr in @sap - * @net: applicable network namespace * @buf: C string containing presentation format IP address * @buflen: length of presentation address in bytes * @sap: buffer into which to plant socket address @@ -243,14 +241,14 @@ static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, * socket address, if successful. Returns zero if an error * occurred. */ -size_t rpc_pton(struct net *net, const char *buf, const size_t buflen, +size_t rpc_pton(const char *buf, const size_t buflen, struct sockaddr *sap, const size_t salen) { unsigned int i; for (i = 0; i < buflen; i++) if (buf[i] == ':') - return rpc_pton6(net, buf, buflen, sap, salen); + return rpc_pton6(buf, buflen, sap, salen); return rpc_pton4(buf, buflen, sap, salen); } EXPORT_SYMBOL_GPL(rpc_pton); @@ -297,7 +295,6 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) /** * rpc_uaddr2sockaddr - convert a universal address to a socket address. - * @net: applicable network namespace * @uaddr: C string containing universal address to convert * @uaddr_len: length of universal address string * @sap: buffer into which to plant socket address @@ -309,9 +306,8 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) * Returns the size of the socket address if successful; otherwise * zero is returned. */ -size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr, - const size_t uaddr_len, struct sockaddr *sap, - const size_t salen) +size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, + struct sockaddr *sap, const size_t salen) { char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; unsigned long portlo, porthi; @@ -343,7 +339,7 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr, port = (unsigned short)((porthi << 8) | portlo); *c = '\0'; - if (rpc_pton(net, buf, strlen(buf), sap, salen) == 0) + if (rpc_pton(buf, strlen(buf), sap, salen) == 0) return 0; switch (sap->sa_family) { diff --git a/trunk/net/sunrpc/auth_gss/auth_gss.c b/trunk/net/sunrpc/auth_gss/auth_gss.c index d3ad81f8da5b..affa631ac1ab 100644 --- a/trunk/net/sunrpc/auth_gss/auth_gss.c +++ b/trunk/net/sunrpc/auth_gss/auth_gss.c @@ -81,7 +81,7 @@ struct gss_auth { * mechanism (for example, "krb5") and exists for * backwards-compatibility with older gssd's. */ - struct rpc_pipe *pipe[2]; + struct dentry *dentry[2]; }; /* pipe_version >= 0 if and only if someone has a pipe open. */ @@ -112,7 +112,7 @@ gss_put_ctx(struct gss_cl_ctx *ctx) /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context - * and a new one is protected by the pipe->lock. + * and a new one is protected by the inode->i_lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) @@ -251,7 +251,7 @@ struct gss_upcall_msg { struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; - struct rpc_pipe *pipe; + struct rpc_inode *inode; struct rpc_wait_queue rpc_waitqueue; wait_queue_head_t waitqueue; struct gss_cl_ctx *ctx; @@ -294,10 +294,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid) +__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &pipe->in_downcall, list) { + list_for_each_entry(pos, &rpci->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -315,17 +315,18 @@ __gss_find_upcall(struct rpc_pipe *pipe, uid_t uid) static inline struct gss_upcall_msg * gss_add_msg(struct gss_upcall_msg *gss_msg) { - struct rpc_pipe *pipe = gss_msg->pipe; + struct rpc_inode *rpci = gss_msg->inode; + struct inode *inode = &rpci->vfs_inode; struct gss_upcall_msg *old; - spin_lock(&pipe->lock); - old = __gss_find_upcall(pipe, gss_msg->uid); + spin_lock(&inode->i_lock); + old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &pipe->in_downcall); + list_add(&gss_msg->list, &rpci->in_downcall); } else gss_msg = old; - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); return gss_msg; } @@ -341,14 +342,14 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg) static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - struct rpc_pipe *pipe = gss_msg->pipe; + struct inode *inode = &gss_msg->inode->vfs_inode; if (list_empty(&gss_msg->list)) return; - spin_lock(&pipe->lock); + spin_lock(&inode->i_lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); } static void @@ -375,11 +376,11 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; - struct rpc_pipe *pipe = gss_msg->pipe; + struct inode *inode = &gss_msg->inode->vfs_inode; - spin_lock(&pipe->lock); + spin_lock(&inode->i_lock); gss_handle_downcall_result(gss_cred, gss_msg); - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } @@ -449,7 +450,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, kfree(gss_msg); return ERR_PTR(vers); } - gss_msg->pipe = gss_auth->pipe[vers]; + gss_msg->inode = RPC_I(gss_auth->dentry[vers]->d_inode); INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); @@ -473,7 +474,8 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); + struct inode *inode = &gss_new->inode->vfs_inode; + int res = rpc_queue_upcall(inode, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); gss_msg = ERR_PTR(res); @@ -504,7 +506,7 @@ gss_refresh_upcall(struct rpc_task *task) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; - struct rpc_pipe *pipe; + struct inode *inode; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -522,8 +524,8 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - pipe = gss_msg->pipe; - spin_lock(&pipe->lock); + inode = &gss_msg->inode->vfs_inode; + spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -536,7 +538,7 @@ gss_refresh_upcall(struct rpc_task *task) gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -547,7 +549,7 @@ gss_refresh_upcall(struct rpc_task *task) static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { - struct rpc_pipe *pipe; + struct inode *inode; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -571,14 +573,14 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) err = PTR_ERR(gss_msg); goto out; } - pipe = gss_msg->pipe; + inode = &gss_msg->inode->vfs_inode; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); - spin_lock(&pipe->lock); + spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; @@ -589,7 +591,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -607,7 +609,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; - struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe; + struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; uid_t uid; ssize_t err = -EFBIG; @@ -637,14 +639,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ - spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid); + spin_lock(&inode->i_lock); + gss_msg = __gss_find_upcall(RPC_I(inode), uid); if (gss_msg == NULL) { - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); goto err_put_ctx; } list_del_init(&gss_msg->list); - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { @@ -672,9 +674,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = mlen; err_release_msg: - spin_lock(&pipe->lock); + spin_lock(&inode->i_lock); __gss_unhash_msg(gss_msg); - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); @@ -720,23 +722,23 @@ static int gss_pipe_open_v1(struct inode *inode) static void gss_pipe_release(struct inode *inode) { - struct rpc_pipe *pipe = RPC_I(inode)->pipe; + struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *gss_msg; restart: - spin_lock(&pipe->lock); - list_for_each_entry(gss_msg, &pipe->in_downcall, list) { + spin_lock(&inode->i_lock); + list_for_each_entry(gss_msg, &rpci->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); goto restart; } - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); put_pipe_version(); } @@ -757,75 +759,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) } } -static void gss_pipes_dentries_destroy(struct rpc_auth *auth) -{ - struct gss_auth *gss_auth; - - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - if (gss_auth->pipe[0]->dentry) - rpc_unlink(gss_auth->pipe[0]->dentry); - if (gss_auth->pipe[1]->dentry) - rpc_unlink(gss_auth->pipe[1]->dentry); -} - -static int gss_pipes_dentries_create(struct rpc_auth *auth) -{ - int err; - struct gss_auth *gss_auth; - struct rpc_clnt *clnt; - - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - clnt = gss_auth->client; - - gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, - "gssd", - clnt, gss_auth->pipe[1]); - if (IS_ERR(gss_auth->pipe[1]->dentry)) - return PTR_ERR(gss_auth->pipe[1]->dentry); - gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, - gss_auth->mech->gm_name, - clnt, gss_auth->pipe[0]); - if (IS_ERR(gss_auth->pipe[0]->dentry)) { - err = PTR_ERR(gss_auth->pipe[0]->dentry); - goto err_unlink_pipe_1; - } - return 0; - -err_unlink_pipe_1: - rpc_unlink(gss_auth->pipe[1]->dentry); - return err; -} - -static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, - struct rpc_auth *auth) -{ - struct net *net = rpc_net_ns(clnt); - struct super_block *sb; - - sb = rpc_get_sb_net(net); - if (sb) { - if (clnt->cl_dentry) - gss_pipes_dentries_destroy(auth); - rpc_put_sb_net(net); - } -} - -static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, - struct rpc_auth *auth) -{ - struct net *net = rpc_net_ns(clnt); - struct super_block *sb; - int err = 0; - - sb = rpc_get_sb_net(net); - if (sb) { - if (clnt->cl_dentry) - err = gss_pipes_dentries_create(auth); - rpc_put_sb_net(net); - } - return err; -} - /* * NOTE: we have the opportunity to use different * parameters based on the input flavor (which must be a pseudoflavor) @@ -868,33 +801,32 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->pipe[1])) { - err = PTR_ERR(gss_auth->pipe[1]); + gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, + "gssd", + clnt, &gss_upcall_ops_v1, + RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->dentry[1])) { + err = PTR_ERR(gss_auth->dentry[1]); goto err_put_mech; } - gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0, - RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->pipe[0])) { - err = PTR_ERR(gss_auth->pipe[0]); - goto err_destroy_pipe_1; + gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, + gss_auth->mech->gm_name, + clnt, &gss_upcall_ops_v0, + RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->dentry[0])) { + err = PTR_ERR(gss_auth->dentry[0]); + goto err_unlink_pipe_1; } - err = gss_pipes_dentries_create_net(clnt, auth); - if (err) - goto err_destroy_pipe_0; err = rpcauth_init_credcache(auth); if (err) - goto err_unlink_pipes; + goto err_unlink_pipe_0; return auth; -err_unlink_pipes: - gss_pipes_dentries_destroy_net(clnt, auth); -err_destroy_pipe_0: - rpc_destroy_pipe_data(gss_auth->pipe[0]); -err_destroy_pipe_1: - rpc_destroy_pipe_data(gss_auth->pipe[1]); +err_unlink_pipe_0: + rpc_unlink(gss_auth->dentry[0]); +err_unlink_pipe_1: + rpc_unlink(gss_auth->dentry[1]); err_put_mech: gss_mech_put(gss_auth->mech); err_free: @@ -907,9 +839,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) static void gss_free(struct gss_auth *gss_auth) { - gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth); - rpc_destroy_pipe_data(gss_auth->pipe[0]); - rpc_destroy_pipe_data(gss_auth->pipe[1]); + rpc_unlink(gss_auth->dentry[1]); + rpc_unlink(gss_auth->dentry[0]); gss_mech_put(gss_auth->mech); kfree(gss_auth); @@ -1616,9 +1547,7 @@ static const struct rpc_authops authgss_ops = { .create = gss_create, .destroy = gss_destroy, .lookup_cred = gss_lookup_cred, - .crcreate = gss_create_cred, - .pipes_create = gss_pipes_dentries_create, - .pipes_destroy = gss_pipes_dentries_destroy, + .crcreate = gss_create_cred }; static const struct rpc_credops gss_credops = { @@ -1662,21 +1591,6 @@ static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .release_pipe = gss_pipe_release, }; -static __net_init int rpcsec_gss_init_net(struct net *net) -{ - return gss_svc_init_net(net); -} - -static __net_exit void rpcsec_gss_exit_net(struct net *net) -{ - gss_svc_shutdown_net(net); -} - -static struct pernet_operations rpcsec_gss_net_ops = { - .init = rpcsec_gss_init_net, - .exit = rpcsec_gss_exit_net, -}; - /* * Initialize RPCSEC_GSS module */ @@ -1690,13 +1604,8 @@ static int __init init_rpcsec_gss(void) err = gss_svc_init(); if (err) goto out_unregister; - err = register_pernet_subsys(&rpcsec_gss_net_ops); - if (err) - goto out_svc_exit; rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version"); return 0; -out_svc_exit: - gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: @@ -1705,7 +1614,6 @@ static int __init init_rpcsec_gss(void) static void __exit exit_rpcsec_gss(void) { - unregister_pernet_subsys(&rpcsec_gss_net_ops); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c b/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c index 0f43e894bc0a..9576f35ab701 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -600,14 +600,11 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf, u32 ret; struct scatterlist sg[1]; struct blkcipher_desc desc = { .tfm = cipher, .info = iv }; - u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; + u8 data[crypto_blkcipher_blocksize(cipher) * 2]; struct page **save_pages; u32 len = buf->len - offset; - if (len > ARRAY_SIZE(data)) { - WARN_ON(0); - return -ENOMEM; - } + BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2); /* * For encryption, we want to read from the cleartext diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c b/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c index 8eff8c32d1b9..8c67890de427 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -344,7 +344,7 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) return PTR_ERR(p); } -static struct crypto_blkcipher * +struct crypto_blkcipher * context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key) { struct crypto_blkcipher *cp; diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c b/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c index 62ae3273186c..d7941eab7796 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -159,7 +159,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } -static u32 +u32 gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj *token) { diff --git a/trunk/net/sunrpc/auth_gss/svcauth_gss.c b/trunk/net/sunrpc/auth_gss/svcauth_gss.c index 1600cfb1618c..8d0f7d3c71c8 100644 --- a/trunk/net/sunrpc/auth_gss/svcauth_gss.c +++ b/trunk/net/sunrpc/auth_gss/svcauth_gss.c @@ -48,8 +48,6 @@ #include #include -#include "../netns.h" - #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -77,8 +75,10 @@ struct rsi { int major_status, minor_status; }; -static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old); -static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item); +static struct cache_head *rsi_table[RSI_HASHMAX]; +static struct cache_detail rsi_cache; +static struct rsi *rsi_update(struct rsi *new, struct rsi *old); +static struct rsi *rsi_lookup(struct rsi *item); static void rsi_free(struct rsi *rsii) { @@ -216,7 +216,7 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.in_token, buf, len)) goto out; - rsip = rsi_lookup(cd, &rsii); + rsip = rsi_lookup(&rsii); if (!rsip) goto out; @@ -258,20 +258,21 @@ static int rsi_parse(struct cache_detail *cd, if (dup_to_netobj(&rsii.out_token, buf, len)) goto out; rsii.h.expiry_time = expiry; - rsip = rsi_update(cd, &rsii, rsip); + rsip = rsi_update(&rsii, rsip); status = 0; out: rsi_free(&rsii); if (rsip) - cache_put(&rsip->h, cd); + cache_put(&rsip->h, &rsi_cache); else status = -ENOMEM; return status; } -static struct cache_detail rsi_cache_template = { +static struct cache_detail rsi_cache = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, + .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, .cache_upcall = rsi_upcall, @@ -282,24 +283,24 @@ static struct cache_detail rsi_cache_template = { .alloc = rsi_alloc, }; -static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item) +static struct rsi *rsi_lookup(struct rsi *item) { struct cache_head *ch; int hash = rsi_hash(item); - ch = sunrpc_cache_lookup(cd, &item->h, hash); + ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash); if (ch) return container_of(ch, struct rsi, h); else return NULL; } -static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old) +static struct rsi *rsi_update(struct rsi *new, struct rsi *old) { struct cache_head *ch; int hash = rsi_hash(new); - ch = sunrpc_cache_update(cd, &new->h, + ch = sunrpc_cache_update(&rsi_cache, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsi, h); @@ -338,8 +339,10 @@ struct rsc { char *client_name; }; -static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); -static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item); +static struct cache_head *rsc_table[RSC_HASHMAX]; +static struct cache_detail rsc_cache; +static struct rsc *rsc_update(struct rsc *new, struct rsc *old); +static struct rsc *rsc_lookup(struct rsc *item); static void rsc_free(struct rsc *rsci) { @@ -441,7 +444,7 @@ static int rsc_parse(struct cache_detail *cd, if (expiry == 0) goto out; - rscp = rsc_lookup(cd, &rsci); + rscp = rsc_lookup(&rsci); if (!rscp) goto out; @@ -503,21 +506,22 @@ static int rsc_parse(struct cache_detail *cd, } rsci.h.expiry_time = expiry; - rscp = rsc_update(cd, &rsci, rscp); + rscp = rsc_update(&rsci, rscp); status = 0; out: gss_mech_put(gm); rsc_free(&rsci); if (rscp) - cache_put(&rscp->h, cd); + cache_put(&rscp->h, &rsc_cache); else status = -ENOMEM; return status; } -static struct cache_detail rsc_cache_template = { +static struct cache_detail rsc_cache = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, + .hash_table = rsc_table, .name = "auth.rpcsec.context", .cache_put = rsc_put, .cache_parse = rsc_parse, @@ -527,24 +531,24 @@ static struct cache_detail rsc_cache_template = { .alloc = rsc_alloc, }; -static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item) +static struct rsc *rsc_lookup(struct rsc *item) { struct cache_head *ch; int hash = rsc_hash(item); - ch = sunrpc_cache_lookup(cd, &item->h, hash); + ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash); if (ch) return container_of(ch, struct rsc, h); else return NULL; } -static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old) +static struct rsc *rsc_update(struct rsc *new, struct rsc *old) { struct cache_head *ch; int hash = rsc_hash(new); - ch = sunrpc_cache_update(cd, &new->h, + ch = sunrpc_cache_update(&rsc_cache, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsc, h); @@ -554,7 +558,7 @@ static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct r static struct rsc * -gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) +gss_svc_searchbyctx(struct xdr_netobj *handle) { struct rsc rsci; struct rsc *found; @@ -562,11 +566,11 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) memset(&rsci, 0, sizeof(rsci)); if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) return NULL; - found = rsc_lookup(cd, &rsci); + found = rsc_lookup(&rsci); rsc_free(&rsci); if (!found) return NULL; - if (cache_check(cd, &found->h, NULL)) + if (cache_check(&rsc_cache, &found->h, NULL)) return NULL; return found; } @@ -964,20 +968,20 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) } static inline int -gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct rsi *rsip) +gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) { struct rsc *rsci; int rc; if (rsip->major_status != GSS_S_COMPLETE) return gss_write_null_verf(rqstp); - rsci = gss_svc_searchbyctx(cd, &rsip->out_handle); + rsci = gss_svc_searchbyctx(&rsip->out_handle); if (rsci == NULL) { rsip->major_status = GSS_S_NO_CONTEXT; return gss_write_null_verf(rqstp); } rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); - cache_put(&rsci->h, cd); + cache_put(&rsci->h, &rsc_cache); return rc; } @@ -996,7 +1000,6 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, struct xdr_netobj tmpobj; struct rsi *rsip, rsikey; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); /* Read the verifier; should be NULL: */ *authp = rpc_autherr_badverf; @@ -1025,17 +1028,17 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, } /* Perform upcall, or find upcall result: */ - rsip = rsi_lookup(sn->rsi_cache, &rsikey); + rsip = rsi_lookup(&rsikey); rsi_free(&rsikey); if (!rsip) return SVC_CLOSE; - if (cache_check(sn->rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) + if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) /* No upcall result: */ return SVC_CLOSE; ret = SVC_CLOSE; /* Got an answer to the upcall; use it: */ - if (gss_write_init_verf(sn->rsc_cache, rqstp, rsip)) + if (gss_write_init_verf(rqstp, rsip)) goto out; if (resv->iov_len + 4 > PAGE_SIZE) goto out; @@ -1052,7 +1055,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, ret = SVC_COMPLETE; out: - cache_put(&rsip->h, sn->rsi_cache); + cache_put(&rsip->h, &rsi_cache); return ret; } @@ -1076,7 +1079,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", argv->iov_len); @@ -1127,7 +1129,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: /* Look up the context, and check the verifier: */ *authp = rpcsec_gsserr_credproblem; - rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx); + rsci = gss_svc_searchbyctx(&gc->gc_ctx); if (!rsci) goto auth_err; switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { @@ -1207,7 +1209,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) ret = SVC_DROP; out: if (rsci) - cache_put(&rsci->h, sn->rsc_cache); + cache_put(&rsci->h, &rsc_cache); return ret; } @@ -1360,7 +1362,6 @@ svcauth_gss_release(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; int stat = -EINVAL; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; @@ -1403,7 +1404,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; if (gsd->rsci) - cache_put(&gsd->rsci->h, sn->rsc_cache); + cache_put(&gsd->rsci->h, &rsc_cache); gsd->rsci = NULL; return stat; @@ -1428,96 +1429,30 @@ static struct auth_ops svcauthops_gss = { .set_client = svcauth_gss_set_client, }; -static int rsi_cache_create_net(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd; - int err; - - cd = cache_create_net(&rsi_cache_template, net); - if (IS_ERR(cd)) - return PTR_ERR(cd); - err = cache_register_net(cd, net); - if (err) { - cache_destroy_net(cd, net); - return err; - } - sn->rsi_cache = cd; - return 0; -} - -static void rsi_cache_destroy_net(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd = sn->rsi_cache; - - sn->rsi_cache = NULL; - cache_purge(cd); - cache_unregister_net(cd, net); - cache_destroy_net(cd, net); -} - -static int rsc_cache_create_net(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd; - int err; - - cd = cache_create_net(&rsc_cache_template, net); - if (IS_ERR(cd)) - return PTR_ERR(cd); - err = cache_register_net(cd, net); - if (err) { - cache_destroy_net(cd, net); - return err; - } - sn->rsc_cache = cd; - return 0; -} - -static void rsc_cache_destroy_net(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd = sn->rsc_cache; - - sn->rsc_cache = NULL; - cache_purge(cd); - cache_unregister_net(cd, net); - cache_destroy_net(cd, net); -} - int -gss_svc_init_net(struct net *net) +gss_svc_init(void) { - int rv; - - rv = rsc_cache_create_net(net); + int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); if (rv) return rv; - rv = rsi_cache_create_net(net); + rv = cache_register(&rsc_cache); if (rv) goto out1; + rv = cache_register(&rsi_cache); + if (rv) + goto out2; return 0; +out2: + cache_unregister(&rsc_cache); out1: - rsc_cache_destroy_net(net); + svc_auth_unregister(RPC_AUTH_GSS); return rv; } -void -gss_svc_shutdown_net(struct net *net) -{ - rsi_cache_destroy_net(net); - rsc_cache_destroy_net(net); -} - -int -gss_svc_init(void) -{ - return svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); -} - void gss_svc_shutdown(void) { + cache_unregister(&rsc_cache); + cache_unregister(&rsi_cache); svc_auth_unregister(RPC_AUTH_GSS); } diff --git a/trunk/net/sunrpc/backchannel_rqst.c b/trunk/net/sunrpc/backchannel_rqst.c index 31def68a0f6e..3ad435a14ada 100644 --- a/trunk/net/sunrpc/backchannel_rqst.c +++ b/trunk/net/sunrpc/backchannel_rqst.c @@ -25,7 +25,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_TRANS diff --git a/trunk/net/sunrpc/cache.c b/trunk/net/sunrpc/cache.c index f21ece088764..465df9ae1046 100644 --- a/trunk/net/sunrpc/cache.c +++ b/trunk/net/sunrpc/cache.c @@ -344,7 +344,7 @@ static int current_index; static void do_cache_clean(struct work_struct *work); static struct delayed_work cache_cleaner; -void sunrpc_init_cache_detail(struct cache_detail *cd) +static void sunrpc_init_cache_detail(struct cache_detail *cd) { rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); @@ -360,9 +360,8 @@ void sunrpc_init_cache_detail(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); } -EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); -void sunrpc_destroy_cache_detail(struct cache_detail *cd) +static void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -385,7 +384,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } -EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); /* clean cache tries to find something to clean * and cleans it. @@ -1645,6 +1643,12 @@ int cache_register_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_register_net); +int cache_register(struct cache_detail *cd) +{ + return cache_register_net(cd, &init_net); +} +EXPORT_SYMBOL_GPL(cache_register); + void cache_unregister_net(struct cache_detail *cd, struct net *net) { remove_cache_proc_entries(cd, net); @@ -1652,31 +1656,11 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_unregister_net); -struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) -{ - struct cache_detail *cd; - - cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - return ERR_PTR(-ENOMEM); - - cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), - GFP_KERNEL); - if (cd->hash_table == NULL) { - kfree(cd); - return ERR_PTR(-ENOMEM); - } - cd->net = net; - return cd; -} -EXPORT_SYMBOL_GPL(cache_create_net); - -void cache_destroy_net(struct cache_detail *cd, struct net *net) +void cache_unregister(struct cache_detail *cd) { - kfree(cd->hash_table); - kfree(cd); + cache_unregister_net(cd, &init_net); } -EXPORT_SYMBOL_GPL(cache_destroy_net); +EXPORT_SYMBOL_GPL(cache_unregister); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) @@ -1803,14 +1787,17 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, struct dentry *dir; int ret = 0; + sunrpc_init_cache_detail(cd); q.name = name; q.len = strlen(name); q.hash = full_name_hash(q.name, q.len); dir = rpc_create_cache_dir(parent, &q, umode, cd); if (!IS_ERR(dir)) cd->u.pipefs.dir = dir; - else + else { + sunrpc_destroy_cache_detail(cd); ret = PTR_ERR(dir); + } return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); @@ -1819,6 +1806,7 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) { rpc_remove_cache_dir(cd->u.pipefs.dir); cd->u.pipefs.dir = NULL; + sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); diff --git a/trunk/net/sunrpc/clnt.c b/trunk/net/sunrpc/clnt.c index 7a4cb5fdc212..f0268ea7e711 100644 --- a/trunk/net/sunrpc/clnt.c +++ b/trunk/net/sunrpc/clnt.c @@ -31,16 +31,13 @@ #include #include #include -#include #include #include #include #include -#include #include "sunrpc.h" -#include "netns.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -53,6 +50,8 @@ /* * All RPC clients are linked into this list */ +static LIST_HEAD(all_clients); +static DEFINE_SPINLOCK(rpc_client_lock); static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -82,191 +81,82 @@ static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { - struct net *net = rpc_net_ns(clnt); - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - spin_lock(&sn->rpc_client_lock); - list_add(&clnt->cl_clients, &sn->all_clients); - spin_unlock(&sn->rpc_client_lock); + spin_lock(&rpc_client_lock); + list_add(&clnt->cl_clients, &all_clients); + spin_unlock(&rpc_client_lock); } static void rpc_unregister_client(struct rpc_clnt *clnt) { - struct net *net = rpc_net_ns(clnt); - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - spin_lock(&sn->rpc_client_lock); + spin_lock(&rpc_client_lock); list_del(&clnt->cl_clients); - spin_unlock(&sn->rpc_client_lock); -} - -static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) -{ - if (clnt->cl_dentry) { - if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy) - clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth); - rpc_remove_client_dir(clnt->cl_dentry); - } - clnt->cl_dentry = NULL; -} - -static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) -{ - struct net *net = rpc_net_ns(clnt); - struct super_block *pipefs_sb; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - __rpc_clnt_remove_pipedir(clnt); - rpc_put_sb_net(net); - } + spin_unlock(&rpc_client_lock); } -static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, - struct rpc_clnt *clnt, - const char *dir_name) +static int +rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; + struct path path, dir; char name[15]; struct qstr q = { .name = name, }; - struct dentry *dir, *dentry; int error; - dir = rpc_d_lookup_sb(sb, dir_name); - if (dir == NULL) - return dir; + clnt->cl_path.mnt = ERR_PTR(-ENOENT); + clnt->cl_path.dentry = ERR_PTR(-ENOENT); + if (dir_name == NULL) + return 0; + + path.mnt = rpc_get_mount(); + if (IS_ERR(path.mnt)) + return PTR_ERR(path.mnt); + error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &dir); + if (error) + goto err; + for (;;) { q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; q.hash = full_name_hash(q.name, q.len); - dentry = rpc_create_client_dir(dir, &q, clnt); - if (!IS_ERR(dentry)) + path.dentry = rpc_create_client_dir(dir.dentry, &q, clnt); + if (!IS_ERR(path.dentry)) break; - error = PTR_ERR(dentry); + error = PTR_ERR(path.dentry); if (error != -EEXIST) { printk(KERN_INFO "RPC: Couldn't create pipefs entry" " %s/%s, error %d\n", dir_name, name, error); - break; + goto err_path_put; } } - dput(dir); - return dentry; -} - -static int -rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) -{ - struct net *net = rpc_net_ns(clnt); - struct super_block *pipefs_sb; - struct dentry *dentry; - - clnt->cl_dentry = NULL; - if (dir_name == NULL) - return 0; - pipefs_sb = rpc_get_sb_net(net); - if (!pipefs_sb) - return 0; - dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); - rpc_put_sb_net(net); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - clnt->cl_dentry = dentry; + path_put(&dir); + clnt->cl_path = path; return 0; -} - -static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, - struct super_block *sb) -{ - struct dentry *dentry; - int err = 0; - - switch (event) { - case RPC_PIPEFS_MOUNT: - if (clnt->cl_program->pipe_dir_name == NULL) - break; - dentry = rpc_setup_pipedir_sb(sb, clnt, - clnt->cl_program->pipe_dir_name); - BUG_ON(dentry == NULL); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - clnt->cl_dentry = dentry; - if (clnt->cl_auth->au_ops->pipes_create) { - err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth); - if (err) - __rpc_clnt_remove_pipedir(clnt); - } - break; - case RPC_PIPEFS_UMOUNT: - __rpc_clnt_remove_pipedir(clnt); - break; - default: - printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); - return -ENOTSUPP; - } - return err; -} - -static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct rpc_clnt *clnt; - - spin_lock(&sn->rpc_client_lock); - list_for_each_entry(clnt, &sn->all_clients, cl_clients) { - if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || - ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) - continue; - atomic_inc(&clnt->cl_count); - spin_unlock(&sn->rpc_client_lock); - return clnt; - } - spin_unlock(&sn->rpc_client_lock); - return NULL; -} - -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) -{ - struct super_block *sb = ptr; - struct rpc_clnt *clnt; - int error = 0; - - while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { - error = __rpc_pipefs_event(clnt, event, sb); - rpc_release_client(clnt); - if (error) - break; - } +err_path_put: + path_put(&dir); +err: + rpc_put_mount(); return error; } -static struct notifier_block rpc_clients_block = { - .notifier_call = rpc_pipefs_event, - .priority = SUNRPC_PIPEFS_RPC_PRIO, -}; - -int rpc_clients_notifier_register(void) -{ - return rpc_pipefs_notifier_register(&rpc_clients_block); -} - -void rpc_clients_notifier_unregister(void) -{ - return rpc_pipefs_notifier_unregister(&rpc_clients_block); -} - static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { - const struct rpc_program *program = args->program; - const struct rpc_version *version; + struct rpc_program *program = args->program; + struct rpc_version *version; struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; + size_t len; /* sanity check the name before trying to print it */ + err = -EINVAL; + len = strlen(args->servername); + if (len > RPC_MAXNETNAMELEN) + goto out_no_rpciod; + len++; + dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, args->servername, xprt); @@ -289,7 +179,17 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_err; clnt->cl_parent = clnt; - rcu_assign_pointer(clnt->cl_xprt, xprt); + clnt->cl_server = clnt->cl_inline_name; + if (len > sizeof(clnt->cl_inline_name)) { + char *buf = kmalloc(len, GFP_KERNEL); + if (buf != NULL) + clnt->cl_server = buf; + else + len = sizeof(clnt->cl_inline_name); + } + strlcpy(clnt->cl_server, args->servername, len); + + clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; @@ -304,7 +204,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru INIT_LIST_HEAD(&clnt->cl_tasks); spin_lock_init(&clnt->cl_lock); - if (!xprt_bound(xprt)) + if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; clnt->cl_timeout = xprt->timeout; @@ -346,12 +246,17 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - rpc_clnt_remove_pipedir(clnt); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); + rpc_put_mount(); + } out_no_path: kfree(clnt->cl_principal); out_no_principal: rpc_free_iostats(clnt->cl_metrics); out_no_stats: + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); kfree(clnt); out_err: xprt_put(xprt); @@ -381,7 +286,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, - .servername = args->servername, .bc_xprt = args->bc_xprt, }; char servername[48]; @@ -390,7 +294,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * If the caller chooses not to specify a hostname, whip * up a string representation of the passed-in address. */ - if (xprtargs.servername == NULL) { + if (args->servername == NULL) { struct sockaddr_un *sun = (struct sockaddr_un *)args->address; struct sockaddr_in *sin = @@ -417,7 +321,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * address family isn't recognized. */ return ERR_PTR(-EINVAL); } - xprtargs.servername = servername; + args->servername = servername; } xprt = xprt_create_transport(&xprtargs); @@ -470,7 +374,6 @@ struct rpc_clnt * rpc_clone_client(struct rpc_clnt *clnt) { struct rpc_clnt *new; - struct rpc_xprt *xprt; int err = -ENOMEM; new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); @@ -490,25 +393,18 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } - rcu_read_lock(); - xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); - rcu_read_unlock(); - if (xprt == NULL) - goto out_no_transport; - rcu_assign_pointer(new->cl_xprt, xprt); atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + xprt_get(clnt->cl_xprt); atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; out_no_path: - xprt_put(xprt); -out_no_transport: kfree(new->cl_principal); out_no_principal: rpc_free_iostats(new->cl_metrics); @@ -557,9 +453,8 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks); */ void rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk_rcu("RPC: shutting down %s client for %s\n", - clnt->cl_protname, - rcu_dereference(clnt->cl_xprt)->servername); + dprintk("RPC: shutting down %s client for %s\n", + clnt->cl_protname, clnt->cl_server); while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); @@ -577,17 +472,24 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); static void rpc_free_client(struct rpc_clnt *clnt) { - dprintk_rcu("RPC: destroying %s client for %s\n", - clnt->cl_protname, - rcu_dereference(clnt->cl_xprt)->servername); - if (clnt->cl_parent != clnt) + dprintk("RPC: destroying %s client for %s\n", + clnt->cl_protname, clnt->cl_server); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); + rpc_put_mount(); + } + if (clnt->cl_parent != clnt) { rpc_release_client(clnt->cl_parent); + goto out_free; + } + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); +out_free: rpc_unregister_client(clnt); - rpc_clnt_remove_pipedir(clnt); rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; - xprt_put(rcu_dereference_raw(clnt->cl_xprt)); + xprt_put(clnt->cl_xprt); rpciod_down(); kfree(clnt); } @@ -640,11 +542,11 @@ rpc_release_client(struct rpc_clnt *clnt) * The Sun NFSv2/v3 ACL protocol can do this. */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, - const struct rpc_program *program, + struct rpc_program *program, u32 vers) { struct rpc_clnt *clnt; - const struct rpc_version *version; + struct rpc_version *version; int err; BUG_ON(vers >= program->nrvers || !program->version[vers]); @@ -876,18 +778,13 @@ EXPORT_SYMBOL_GPL(rpc_call_start); size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) { size_t bytes; - struct rpc_xprt *xprt; + struct rpc_xprt *xprt = clnt->cl_xprt; - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - - bytes = xprt->addrlen; + bytes = sizeof(xprt->addr); if (bytes > bufsize) bytes = bufsize; - memcpy(buf, &xprt->addr, bytes); - rcu_read_unlock(); - - return bytes; + memcpy(buf, &clnt->cl_xprt->addr, bytes); + return xprt->addrlen; } EXPORT_SYMBOL_GPL(rpc_peeraddr); @@ -896,16 +793,11 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr); * @clnt: RPC client structure * @format: address format * - * NB: the lifetime of the memory referenced by the returned pointer is - * the same as the rpc_xprt itself. As long as the caller uses this - * pointer, it must hold the RCU read lock. */ const char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) { - struct rpc_xprt *xprt; - - xprt = rcu_dereference(clnt->cl_xprt); + struct rpc_xprt *xprt = clnt->cl_xprt; if (xprt->address_strings[format] != NULL) return xprt->address_strings[format]; @@ -914,203 +806,17 @@ const char *rpc_peeraddr2str(struct rpc_clnt *clnt, } EXPORT_SYMBOL_GPL(rpc_peeraddr2str); -static const struct sockaddr_in rpc_inaddr_loopback = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_ANY), -}; - -static const struct sockaddr_in6 rpc_in6addr_loopback = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_ANY_INIT, -}; - -/* - * Try a getsockname() on a connected datagram socket. Using a - * connected datagram socket prevents leaving a socket in TIME_WAIT. - * This conserves the ephemeral port number space. - * - * Returns zero and fills in "buf" if successful; otherwise, a - * negative errno is returned. - */ -static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, - struct sockaddr *buf, int buflen) -{ - struct socket *sock; - int err; - - err = __sock_create(net, sap->sa_family, - SOCK_DGRAM, IPPROTO_UDP, &sock, 1); - if (err < 0) { - dprintk("RPC: can't create UDP socket (%d)\n", err); - goto out; - } - - switch (sap->sa_family) { - case AF_INET: - err = kernel_bind(sock, - (struct sockaddr *)&rpc_inaddr_loopback, - sizeof(rpc_inaddr_loopback)); - break; - case AF_INET6: - err = kernel_bind(sock, - (struct sockaddr *)&rpc_in6addr_loopback, - sizeof(rpc_in6addr_loopback)); - break; - default: - err = -EAFNOSUPPORT; - goto out; - } - if (err < 0) { - dprintk("RPC: can't bind UDP socket (%d)\n", err); - goto out_release; - } - - err = kernel_connect(sock, sap, salen, 0); - if (err < 0) { - dprintk("RPC: can't connect UDP socket (%d)\n", err); - goto out_release; - } - - err = kernel_getsockname(sock, buf, &buflen); - if (err < 0) { - dprintk("RPC: getsockname failed (%d)\n", err); - goto out_release; - } - - err = 0; - if (buf->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)buf; - sin6->sin6_scope_id = 0; - } - dprintk("RPC: %s succeeded\n", __func__); - -out_release: - sock_release(sock); -out: - return err; -} - -/* - * Scraping a connected socket failed, so we don't have a useable - * local address. Fallback: generate an address that will prevent - * the server from calling us back. - * - * Returns zero and fills in "buf" if successful; otherwise, a - * negative errno is returned. - */ -static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen) -{ - switch (family) { - case AF_INET: - if (buflen < sizeof(rpc_inaddr_loopback)) - return -EINVAL; - memcpy(buf, &rpc_inaddr_loopback, - sizeof(rpc_inaddr_loopback)); - break; - case AF_INET6: - if (buflen < sizeof(rpc_in6addr_loopback)) - return -EINVAL; - memcpy(buf, &rpc_in6addr_loopback, - sizeof(rpc_in6addr_loopback)); - default: - dprintk("RPC: %s: address family not supported\n", - __func__); - return -EAFNOSUPPORT; - } - dprintk("RPC: %s: succeeded\n", __func__); - return 0; -} - -/** - * rpc_localaddr - discover local endpoint address for an RPC client - * @clnt: RPC client structure - * @buf: target buffer - * @buflen: size of target buffer, in bytes - * - * Returns zero and fills in "buf" and "buflen" if successful; - * otherwise, a negative errno is returned. - * - * This works even if the underlying transport is not currently connected, - * or if the upper layer never previously provided a source address. - * - * The result of this function call is transient: multiple calls in - * succession may give different results, depending on how local - * networking configuration changes over time. - */ -int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen) -{ - struct sockaddr_storage address; - struct sockaddr *sap = (struct sockaddr *)&address; - struct rpc_xprt *xprt; - struct net *net; - size_t salen; - int err; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - salen = xprt->addrlen; - memcpy(sap, &xprt->addr, salen); - net = get_net(xprt->xprt_net); - rcu_read_unlock(); - - rpc_set_port(sap, 0); - err = rpc_sockname(net, sap, salen, buf, buflen); - put_net(net); - if (err != 0) - /* Couldn't discover local address, return ANYADDR */ - return rpc_anyaddr(sap->sa_family, buf, buflen); - return 0; -} -EXPORT_SYMBOL_GPL(rpc_localaddr); - void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); + struct rpc_xprt *xprt = clnt->cl_xprt; if (xprt->ops->set_buffer_size) xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_setbufsize); -/** - * rpc_protocol - Get transport protocol number for an RPC client - * @clnt: RPC client to query - * - */ -int rpc_protocol(struct rpc_clnt *clnt) -{ - int protocol; - - rcu_read_lock(); - protocol = rcu_dereference(clnt->cl_xprt)->prot; - rcu_read_unlock(); - return protocol; -} -EXPORT_SYMBOL_GPL(rpc_protocol); - -/** - * rpc_net_ns - Get the network namespace for this RPC client - * @clnt: RPC client to query - * - */ -struct net *rpc_net_ns(struct rpc_clnt *clnt) -{ - struct net *ret; - - rcu_read_lock(); - ret = rcu_dereference(clnt->cl_xprt)->xprt_net; - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(rpc_net_ns); - -/** - * rpc_max_payload - Get maximum payload size for a transport, in bytes - * @clnt: RPC client to query +/* + * Return size of largest payload RPC client can support, in bytes * * For stream transports, this is one RPC record fragment (see RFC * 1831), as we don't support multi-record requests yet. For datagram @@ -1119,12 +825,7 @@ EXPORT_SYMBOL_GPL(rpc_net_ns); */ size_t rpc_max_payload(struct rpc_clnt *clnt) { - size_t ret; - - rcu_read_lock(); - ret = rcu_dereference(clnt->cl_xprt)->max_payload; - rcu_read_unlock(); - return ret; + return clnt->cl_xprt->max_payload; } EXPORT_SYMBOL_GPL(rpc_max_payload); @@ -1135,11 +836,8 @@ EXPORT_SYMBOL_GPL(rpc_max_payload); */ void rpc_force_rebind(struct rpc_clnt *clnt) { - if (clnt->cl_autobind) { - rcu_read_lock(); - xprt_clear_bound(rcu_dereference(clnt->cl_xprt)); - rcu_read_unlock(); - } + if (clnt->cl_autobind) + xprt_clear_bound(clnt->cl_xprt); } EXPORT_SYMBOL_GPL(rpc_force_rebind); @@ -1465,7 +1163,6 @@ call_bind_status(struct rpc_task *task) return; } - trace_rpc_bind_status(task); switch (task->tk_status) { case -ENOMEM: dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); @@ -1565,7 +1262,6 @@ call_connect_status(struct rpc_task *task) return; } - trace_rpc_connect_status(task, status); switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: @@ -1754,7 +1450,6 @@ call_status(struct rpc_task *task) return; } - trace_rpc_call_status(task); task->tk_status = 0; switch(status) { case -EHOSTDOWN: @@ -1818,11 +1513,8 @@ call_timeout(struct rpc_task *task) } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) - rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, timed out\n", - clnt->cl_protname, - rcu_dereference(clnt->cl_xprt)->servername); - rcu_read_unlock(); + clnt->cl_protname, clnt->cl_server); if (task->tk_flags & RPC_TASK_TIMEOUT) rpc_exit(task, -ETIMEDOUT); else @@ -1832,13 +1524,9 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - if (clnt->cl_chatty) { - rcu_read_lock(); + if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, - rcu_dereference(clnt->cl_xprt)->servername); - rcu_read_unlock(); - } + clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); /* @@ -1867,13 +1555,9 @@ call_decode(struct rpc_task *task) dprint_status(task); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - if (clnt->cl_chatty) { - rcu_read_lock(); + if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, - rcu_dereference(clnt->cl_xprt)->servername); - rcu_read_unlock(); - } + clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; } @@ -1951,7 +1635,6 @@ rpc_encode_header(struct rpc_task *task) static __be32 * rpc_verify_header(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; __be32 *p = iov->iov_base; @@ -2024,11 +1707,8 @@ rpc_verify_header(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: - rcu_read_lock(); printk(KERN_NOTICE "RPC: server %s requires stronger " - "authentication.\n", - rcu_dereference(clnt->cl_xprt)->servername); - rcu_read_unlock(); + "authentication.\n", task->tk_client->cl_server); break; default: dprintk("RPC: %5u %s: unknown auth error: %x\n", @@ -2051,27 +1731,28 @@ rpc_verify_header(struct rpc_task *task) case RPC_SUCCESS: return p; case RPC_PROG_UNAVAIL: - dprintk_rcu("RPC: %5u %s: program %u is unsupported " - "by server %s\n", task->tk_pid, __func__, - (unsigned int)clnt->cl_prog, - rcu_dereference(clnt->cl_xprt)->servername); + dprintk("RPC: %5u %s: program %u is unsupported by server %s\n", + task->tk_pid, __func__, + (unsigned int)task->tk_client->cl_prog, + task->tk_client->cl_server); error = -EPFNOSUPPORT; goto out_err; case RPC_PROG_MISMATCH: - dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported " - "by server %s\n", task->tk_pid, __func__, - (unsigned int)clnt->cl_prog, - (unsigned int)clnt->cl_vers, - rcu_dereference(clnt->cl_xprt)->servername); + dprintk("RPC: %5u %s: program %u, version %u unsupported by " + "server %s\n", task->tk_pid, __func__, + (unsigned int)task->tk_client->cl_prog, + (unsigned int)task->tk_client->cl_vers, + task->tk_client->cl_server); error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, " + dprintk("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, rpc_proc_name(task), - clnt->cl_prog, clnt->cl_vers, - rcu_dereference(clnt->cl_xprt)->servername); + task->tk_client->cl_prog, + task->tk_client->cl_vers, + task->tk_client->cl_server); error = -EOPNOTSUPP; goto out_err; case RPC_GARBAGE_ARGS: @@ -2085,7 +1766,7 @@ rpc_verify_header(struct rpc_task *task) } out_garbage: - clnt->cl_stats->rpcgarbage++; + task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; dprintk("RPC: %5u %s: retrying\n", @@ -2171,15 +1852,14 @@ static void rpc_show_task(const struct rpc_clnt *clnt, task->tk_action, rpc_waitq); } -void rpc_show_tasks(struct net *net) +void rpc_show_tasks(void) { struct rpc_clnt *clnt; struct rpc_task *task; int header = 0; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - spin_lock(&sn->rpc_client_lock); - list_for_each_entry(clnt, &sn->all_clients, cl_clients) { + spin_lock(&rpc_client_lock); + list_for_each_entry(clnt, &all_clients, cl_clients) { spin_lock(&clnt->cl_lock); list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { @@ -2190,6 +1870,6 @@ void rpc_show_tasks(struct net *net) } spin_unlock(&clnt->cl_lock); } - spin_unlock(&sn->rpc_client_lock); + spin_unlock(&rpc_client_lock); } #endif diff --git a/trunk/net/sunrpc/netns.h b/trunk/net/sunrpc/netns.h index ce7bd449173d..d013bf211cae 100644 --- a/trunk/net/sunrpc/netns.h +++ b/trunk/net/sunrpc/netns.h @@ -9,20 +9,6 @@ struct cache_detail; struct sunrpc_net { struct proc_dir_entry *proc_net_rpc; struct cache_detail *ip_map_cache; - struct cache_detail *unix_gid_cache; - struct cache_detail *rsc_cache; - struct cache_detail *rsi_cache; - - struct super_block *pipefs_sb; - struct mutex pipefs_sb_lock; - - struct list_head all_clients; - spinlock_t rpc_client_lock; - - struct rpc_clnt *rpcb_local_clnt; - struct rpc_clnt *rpcb_local_clnt4; - spinlock_t rpcb_clnt_lock; - unsigned int rpcb_users; }; extern int sunrpc_net_id; diff --git a/trunk/net/sunrpc/rpc_pipe.c b/trunk/net/sunrpc/rpc_pipe.c index c84c0e0c41cb..7d6dd6efbdbe 100644 --- a/trunk/net/sunrpc/rpc_pipe.c +++ b/trunk/net/sunrpc/rpc_pipe.c @@ -16,9 +16,9 @@ #include #include #include -#include #include +#include #include #include #include @@ -27,15 +27,9 @@ #include #include #include -#include -#include -#include "netns.h" -#include "sunrpc.h" - -#define RPCDBG_FACILITY RPCDBG_DEBUG - -#define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "") +static struct vfsmount *rpc_mnt __read_mostly; +static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; @@ -44,21 +38,7 @@ static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) -static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); - -int rpc_pipefs_notifier_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb); -} -EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); - -void rpc_pipefs_notifier_unregister(struct notifier_block *nb) -{ - blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb); -} -EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); - -static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, +static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { struct rpc_pipe_msg *msg; @@ -71,31 +51,30 @@ static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); - wake_up(waitq); + wake_up(&rpci->waitq); } static void rpc_timeout_upcall_queue(struct work_struct *work) { LIST_HEAD(free_list); - struct rpc_pipe *pipe = - container_of(work, struct rpc_pipe, queue_timeout.work); + struct rpc_inode *rpci = + container_of(work, struct rpc_inode, queue_timeout.work); + struct inode *inode = &rpci->vfs_inode; void (*destroy_msg)(struct rpc_pipe_msg *); - struct dentry *dentry; - spin_lock(&pipe->lock); - destroy_msg = pipe->ops->destroy_msg; - if (pipe->nreaders == 0) { - list_splice_init(&pipe->pipe, &free_list); - pipe->pipelen = 0; + spin_lock(&inode->i_lock); + if (rpci->ops == NULL) { + spin_unlock(&inode->i_lock); + return; } - dentry = dget(pipe->dentry); - spin_unlock(&pipe->lock); - if (dentry) { - rpc_purge_list(&RPC_I(dentry->d_inode)->waitq, - &free_list, destroy_msg, -ETIMEDOUT); - dput(dentry); + destroy_msg = rpci->ops->destroy_msg; + if (rpci->nreaders == 0) { + list_splice_init(&rpci->pipe, &free_list); + rpci->pipelen = 0; } + spin_unlock(&inode->i_lock); + rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, @@ -129,31 +108,30 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); * initialize the fields of @msg (other than @msg->list) appropriately. */ int -rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) +rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) { + struct rpc_inode *rpci = RPC_I(inode); int res = -EPIPE; - struct dentry *dentry; - spin_lock(&pipe->lock); - if (pipe->nreaders) { - list_add_tail(&msg->list, &pipe->pipe); - pipe->pipelen += msg->len; + spin_lock(&inode->i_lock); + if (rpci->ops == NULL) + goto out; + if (rpci->nreaders) { + list_add_tail(&msg->list, &rpci->pipe); + rpci->pipelen += msg->len; res = 0; - } else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { - if (list_empty(&pipe->pipe)) + } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { + if (list_empty(&rpci->pipe)) queue_delayed_work(rpciod_workqueue, - &pipe->queue_timeout, + &rpci->queue_timeout, RPC_UPCALL_TIMEOUT); - list_add_tail(&msg->list, &pipe->pipe); - pipe->pipelen += msg->len; + list_add_tail(&msg->list, &rpci->pipe); + rpci->pipelen += msg->len; res = 0; } - dentry = dget(pipe->dentry); - spin_unlock(&pipe->lock); - if (dentry) { - wake_up(&RPC_I(dentry->d_inode)->waitq); - dput(dentry); - } +out: + spin_unlock(&inode->i_lock); + wake_up(&rpci->waitq); return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); @@ -167,26 +145,29 @@ rpc_inode_setowner(struct inode *inode, void *private) static void rpc_close_pipes(struct inode *inode) { - struct rpc_pipe *pipe = RPC_I(inode)->pipe; + struct rpc_inode *rpci = RPC_I(inode); + const struct rpc_pipe_ops *ops; int need_release; - LIST_HEAD(free_list); mutex_lock(&inode->i_mutex); - spin_lock(&pipe->lock); - need_release = pipe->nreaders != 0 || pipe->nwriters != 0; - pipe->nreaders = 0; - list_splice_init(&pipe->in_upcall, &free_list); - list_splice_init(&pipe->pipe, &free_list); - pipe->pipelen = 0; - pipe->dentry = NULL; - spin_unlock(&pipe->lock); - rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE); - pipe->nwriters = 0; - if (need_release && pipe->ops->release_pipe) - pipe->ops->release_pipe(inode); - cancel_delayed_work_sync(&pipe->queue_timeout); + ops = rpci->ops; + if (ops != NULL) { + LIST_HEAD(free_list); + spin_lock(&inode->i_lock); + need_release = rpci->nreaders != 0 || rpci->nwriters != 0; + rpci->nreaders = 0; + list_splice_init(&rpci->in_upcall, &free_list); + list_splice_init(&rpci->pipe, &free_list); + rpci->pipelen = 0; + rpci->ops = NULL; + spin_unlock(&inode->i_lock); + rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE); + rpci->nwriters = 0; + if (need_release && ops->release_pipe) + ops->release_pipe(inode); + cancel_delayed_work_sync(&rpci->queue_timeout); + } rpc_inode_setowner(inode, NULL); - RPC_I(inode)->pipe = NULL; mutex_unlock(&inode->i_mutex); } @@ -216,24 +197,23 @@ rpc_destroy_inode(struct inode *inode) static int rpc_pipe_open(struct inode *inode, struct file *filp) { - struct rpc_pipe *pipe; + struct rpc_inode *rpci = RPC_I(inode); int first_open; int res = -ENXIO; mutex_lock(&inode->i_mutex); - pipe = RPC_I(inode)->pipe; - if (pipe == NULL) + if (rpci->ops == NULL) goto out; - first_open = pipe->nreaders == 0 && pipe->nwriters == 0; - if (first_open && pipe->ops->open_pipe) { - res = pipe->ops->open_pipe(inode); + first_open = rpci->nreaders == 0 && rpci->nwriters == 0; + if (first_open && rpci->ops->open_pipe) { + res = rpci->ops->open_pipe(inode); if (res) goto out; } if (filp->f_mode & FMODE_READ) - pipe->nreaders++; + rpci->nreaders++; if (filp->f_mode & FMODE_WRITE) - pipe->nwriters++; + rpci->nwriters++; res = 0; out: mutex_unlock(&inode->i_mutex); @@ -243,39 +223,38 @@ rpc_pipe_open(struct inode *inode, struct file *filp) static int rpc_pipe_release(struct inode *inode, struct file *filp) { - struct rpc_pipe *pipe; + struct rpc_inode *rpci = RPC_I(inode); struct rpc_pipe_msg *msg; int last_close; mutex_lock(&inode->i_mutex); - pipe = RPC_I(inode)->pipe; - if (pipe == NULL) + if (rpci->ops == NULL) goto out; msg = filp->private_data; if (msg != NULL) { - spin_lock(&pipe->lock); + spin_lock(&inode->i_lock); msg->errno = -EAGAIN; list_del_init(&msg->list); - spin_unlock(&pipe->lock); - pipe->ops->destroy_msg(msg); + spin_unlock(&inode->i_lock); + rpci->ops->destroy_msg(msg); } if (filp->f_mode & FMODE_WRITE) - pipe->nwriters --; + rpci->nwriters --; if (filp->f_mode & FMODE_READ) { - pipe->nreaders --; - if (pipe->nreaders == 0) { + rpci->nreaders --; + if (rpci->nreaders == 0) { LIST_HEAD(free_list); - spin_lock(&pipe->lock); - list_splice_init(&pipe->pipe, &free_list); - pipe->pipelen = 0; - spin_unlock(&pipe->lock); - rpc_purge_list(&RPC_I(inode)->waitq, &free_list, - pipe->ops->destroy_msg, -EAGAIN); + spin_lock(&inode->i_lock); + list_splice_init(&rpci->pipe, &free_list); + rpci->pipelen = 0; + spin_unlock(&inode->i_lock); + rpc_purge_list(rpci, &free_list, + rpci->ops->destroy_msg, -EAGAIN); } } - last_close = pipe->nwriters == 0 && pipe->nreaders == 0; - if (last_close && pipe->ops->release_pipe) - pipe->ops->release_pipe(inode); + last_close = rpci->nwriters == 0 && rpci->nreaders == 0; + if (last_close && rpci->ops->release_pipe) + rpci->ops->release_pipe(inode); out: mutex_unlock(&inode->i_mutex); return 0; @@ -285,40 +264,39 @@ static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_pipe *pipe; + struct rpc_inode *rpci = RPC_I(inode); struct rpc_pipe_msg *msg; int res = 0; mutex_lock(&inode->i_mutex); - pipe = RPC_I(inode)->pipe; - if (pipe == NULL) { + if (rpci->ops == NULL) { res = -EPIPE; goto out_unlock; } msg = filp->private_data; if (msg == NULL) { - spin_lock(&pipe->lock); - if (!list_empty(&pipe->pipe)) { - msg = list_entry(pipe->pipe.next, + spin_lock(&inode->i_lock); + if (!list_empty(&rpci->pipe)) { + msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); - list_move(&msg->list, &pipe->in_upcall); - pipe->pipelen -= msg->len; + list_move(&msg->list, &rpci->in_upcall); + rpci->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; } - spin_unlock(&pipe->lock); + spin_unlock(&inode->i_lock); if (msg == NULL) goto out_unlock; } /* NOTE: it is up to the callback to update msg->copied */ - res = pipe->ops->upcall(filp, msg, buf, len); + res = rpci->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; - spin_lock(&pipe->lock); + spin_lock(&inode->i_lock); list_del_init(&msg->list); - spin_unlock(&pipe->lock); - pipe->ops->destroy_msg(msg); + spin_unlock(&inode->i_lock); + rpci->ops->destroy_msg(msg); } out_unlock: mutex_unlock(&inode->i_mutex); @@ -329,12 +307,13 @@ static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { struct inode *inode = filp->f_path.dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); int res; mutex_lock(&inode->i_mutex); res = -EPIPE; - if (RPC_I(inode)->pipe != NULL) - res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); + if (rpci->ops != NULL) + res = rpci->ops->downcall(filp, buf, len); mutex_unlock(&inode->i_mutex); return res; } @@ -342,18 +321,17 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of static unsigned int rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { - struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); - unsigned int mask = POLLOUT | POLLWRNORM; + struct rpc_inode *rpci; + unsigned int mask = 0; + rpci = RPC_I(filp->f_path.dentry->d_inode); poll_wait(filp, &rpci->waitq, wait); - mutex_lock(&inode->i_mutex); - if (rpci->pipe == NULL) + mask = POLLOUT | POLLWRNORM; + if (rpci->ops == NULL) mask |= POLLERR | POLLHUP; - else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) + if (filp->private_data || !list_empty(&rpci->pipe)) mask |= POLLIN | POLLRDNORM; - mutex_unlock(&inode->i_mutex); return mask; } @@ -361,26 +339,23 @@ static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_path.dentry->d_inode; - struct rpc_pipe *pipe; + struct rpc_inode *rpci = RPC_I(inode); int len; switch (cmd) { case FIONREAD: - mutex_lock(&inode->i_mutex); - pipe = RPC_I(inode)->pipe; - if (pipe == NULL) { - mutex_unlock(&inode->i_mutex); + spin_lock(&inode->i_lock); + if (rpci->ops == NULL) { + spin_unlock(&inode->i_lock); return -EPIPE; } - spin_lock(&pipe->lock); - len = pipe->pipelen; + len = rpci->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; msg = filp->private_data; len += msg->len - msg->copied; } - spin_unlock(&pipe->lock); - mutex_unlock(&inode->i_mutex); + spin_unlock(&inode->i_lock); return put_user(len, (int __user *)arg); default: return -EINVAL; @@ -403,15 +378,12 @@ rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; - rcu_read_lock(); - seq_printf(m, "RPC server: %s\n", - rcu_dereference(clnt->cl_xprt)->servername); + seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); - rcu_read_unlock(); return 0; } @@ -468,6 +440,23 @@ struct rpc_filelist { umode_t mode; }; +struct vfsmount *rpc_get_mount(void) +{ + int err; + + err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count); + if (err != 0) + return ERR_PTR(err); + return rpc_mnt; +} +EXPORT_SYMBOL_GPL(rpc_get_mount); + +void rpc_put_mount(void) +{ + simple_release_fs(&rpc_mnt, &rpc_mount_count); +} +EXPORT_SYMBOL_GPL(rpc_put_mount); + static int rpc_delete_dentry(const struct dentry *dentry) { return 1; @@ -551,47 +540,12 @@ static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, return 0; } -static void -init_pipe(struct rpc_pipe *pipe) -{ - pipe->nreaders = 0; - pipe->nwriters = 0; - INIT_LIST_HEAD(&pipe->in_upcall); - INIT_LIST_HEAD(&pipe->in_downcall); - INIT_LIST_HEAD(&pipe->pipe); - pipe->pipelen = 0; - INIT_DELAYED_WORK(&pipe->queue_timeout, - rpc_timeout_upcall_queue); - pipe->ops = NULL; - spin_lock_init(&pipe->lock); - pipe->dentry = NULL; -} - -void rpc_destroy_pipe_data(struct rpc_pipe *pipe) -{ - kfree(pipe); -} -EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data); - -struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags) -{ - struct rpc_pipe *pipe; - - pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL); - if (!pipe) - return ERR_PTR(-ENOMEM); - init_pipe(pipe); - pipe->ops = ops; - pipe->flags = flags; - return pipe; -} -EXPORT_SYMBOL_GPL(rpc_mkpipe_data); - -static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private, - struct rpc_pipe *pipe) +static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private, + const struct rpc_pipe_ops *ops, + int flags) { struct rpc_inode *rpci; int err; @@ -600,8 +554,10 @@ static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, if (err) return err; rpci = RPC_I(dentry->d_inode); + rpci->nkern_readwriters = 1; rpci->private = private; - rpci->pipe = pipe; + rpci->flags = flags; + rpci->ops = ops; fsnotify_create(dir, dentry); return 0; } @@ -617,22 +573,6 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) return ret; } -int rpc_rmdir(struct dentry *dentry) -{ - struct dentry *parent; - struct inode *dir; - int error; - - parent = dget_parent(dentry); - dir = parent->d_inode; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - error = __rpc_rmdir(dir, dentry); - mutex_unlock(&dir->i_mutex); - dput(parent); - return error; -} -EXPORT_SYMBOL_GPL(rpc_rmdir); - static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; @@ -647,12 +587,16 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); + rpci->nkern_readwriters--; + if (rpci->nkern_readwriters != 0) + return 0; rpc_close_pipes(inode); return __rpc_unlink(dir, dentry); } -static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, +static struct dentry *__rpc_lookup_create(struct dentry *parent, struct qstr *name) { struct dentry *dentry; @@ -660,13 +604,27 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, dentry = d_lookup(parent, name); if (!dentry) { dentry = d_alloc(parent, name); - if (!dentry) - return ERR_PTR(-ENOMEM); + if (!dentry) { + dentry = ERR_PTR(-ENOMEM); + goto out_err; + } } - if (dentry->d_inode == NULL) { + if (!dentry->d_inode) d_set_d_op(dentry, &rpc_dentry_operations); +out_err: + return dentry; +} + +static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, + struct qstr *name) +{ + struct dentry *dentry; + + dentry = __rpc_lookup_create(parent, name); + if (IS_ERR(dentry)) + return dentry; + if (dentry->d_inode == NULL) return dentry; - } dput(dentry); return ERR_PTR(-EEXIST); } @@ -821,7 +779,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * @private: private data to associate with the pipe, for the caller's use * @ops: operations defining the behavior of the pipe: upcall, downcall, * release_pipe, open_pipe, and destroy_msg. - * @flags: rpc_pipe flags + * @flags: rpc_inode flags * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to @@ -834,8 +792,9 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, - void *private, struct rpc_pipe *pipe) +struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, + void *private, const struct rpc_pipe_ops *ops, + int flags) { struct dentry *dentry; struct inode *dir = parent->d_inode; @@ -843,9 +802,9 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, struct qstr q; int err; - if (pipe->ops->upcall == NULL) + if (ops->upcall == NULL) umode &= ~S_IRUGO; - if (pipe->ops->downcall == NULL) + if (ops->downcall == NULL) umode &= ~S_IWUGO; q.name = name; @@ -853,11 +812,24 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, q.hash = full_name_hash(q.name, q.len), mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, &q); + dentry = __rpc_lookup_create(parent, &q); if (IS_ERR(dentry)) goto out; - err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, - private, pipe); + if (dentry->d_inode) { + struct rpc_inode *rpci = RPC_I(dentry->d_inode); + if (rpci->private != private || + rpci->ops != ops || + rpci->flags != flags) { + dput (dentry); + err = -EBUSY; + goto out_err; + } + rpci->nkern_readwriters++; + goto out; + } + + err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, + private, ops, flags); if (err) goto out_err; out: @@ -870,7 +842,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, err); goto out; } -EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry); +EXPORT_SYMBOL_GPL(rpc_mkpipe); /** * rpc_unlink - remove a pipe @@ -943,7 +915,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @clnt: rpc client + * @dentry: directory to remove */ int rpc_remove_client_dir(struct dentry *dentry) { @@ -1048,64 +1020,11 @@ static const struct rpc_filelist files[] = { }, }; -/* - * This call can be used only in RPC pipefs mount notification hooks. - */ -struct dentry *rpc_d_lookup_sb(const struct super_block *sb, - const unsigned char *dir_name) -{ - struct qstr dir = { - .name = dir_name, - .len = strlen(dir_name), - .hash = full_name_hash(dir_name, strlen(dir_name)), - }; - - return d_lookup(sb->s_root, &dir); -} -EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); - -void rpc_pipefs_init_net(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - mutex_init(&sn->pipefs_sb_lock); -} - -/* - * This call will be used for per network namespace operations calls. - * Note: Function will be returned with pipefs_sb_lock taken if superblock was - * found. This lock have to be released by rpc_put_sb_net() when all operations - * will be completed. - */ -struct super_block *rpc_get_sb_net(const struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - mutex_lock(&sn->pipefs_sb_lock); - if (sn->pipefs_sb) - return sn->pipefs_sb; - mutex_unlock(&sn->pipefs_sb_lock); - return NULL; -} -EXPORT_SYMBOL_GPL(rpc_get_sb_net); - -void rpc_put_sb_net(const struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - BUG_ON(sn->pipefs_sb == NULL); - mutex_unlock(&sn->pipefs_sb_lock); -} -EXPORT_SYMBOL_GPL(rpc_put_sb_net); - static int rpc_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; struct dentry *root; - struct net *net = data; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - int err; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1119,54 +1038,21 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; - dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, - NET_NAME(net)); - err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, - RPC_PIPEFS_MOUNT, - sb); - if (err) - goto err_depopulate; - sb->s_fs_info = get_net(net); - sn->pipefs_sb = sb; return 0; - -err_depopulate: - blocking_notifier_call_chain(&rpc_pipefs_notifier_list, - RPC_PIPEFS_UMOUNT, - sb); - __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); - return err; } static struct dentry * rpc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super); -} - -static void rpc_kill_sb(struct super_block *sb) -{ - struct net *net = sb->s_fs_info; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - - mutex_lock(&sn->pipefs_sb_lock); - sn->pipefs_sb = NULL; - mutex_unlock(&sn->pipefs_sb_lock); - put_net(net); - dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, - NET_NAME(net)); - blocking_notifier_call_chain(&rpc_pipefs_notifier_list, - RPC_PIPEFS_UMOUNT, - sb); - kill_litter_super(sb); + return mount_single(fs_type, flags, data, rpc_fill_super); } static struct file_system_type rpc_pipe_fs_type = { .owner = THIS_MODULE, .name = "rpc_pipefs", .mount = rpc_mount, - .kill_sb = rpc_kill_sb, + .kill_sb = kill_litter_super, }; static void @@ -1176,8 +1062,16 @@ init_once(void *foo) inode_init_once(&rpci->vfs_inode); rpci->private = NULL; - rpci->pipe = NULL; + rpci->nreaders = 0; + rpci->nwriters = 0; + INIT_LIST_HEAD(&rpci->in_upcall); + INIT_LIST_HEAD(&rpci->in_downcall); + INIT_LIST_HEAD(&rpci->pipe); + rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); + INIT_DELAYED_WORK(&rpci->queue_timeout, + rpc_timeout_upcall_queue); + rpci->ops = NULL; } int register_rpc_pipefs(void) @@ -1191,24 +1085,17 @@ int register_rpc_pipefs(void) init_once); if (!rpc_inode_cachep) return -ENOMEM; - err = rpc_clients_notifier_register(); - if (err) - goto err_notifier; err = register_filesystem(&rpc_pipe_fs_type); - if (err) - goto err_register; - return 0; + if (err) { + kmem_cache_destroy(rpc_inode_cachep); + return err; + } -err_register: - rpc_clients_notifier_unregister(); -err_notifier: - kmem_cache_destroy(rpc_inode_cachep); - return err; + return 0; } void unregister_rpc_pipefs(void) { - rpc_clients_notifier_unregister(); kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } diff --git a/trunk/net/sunrpc/rpcb_clnt.c b/trunk/net/sunrpc/rpcb_clnt.c index 207a74696c9f..8761bf8e36fc 100644 --- a/trunk/net/sunrpc/rpcb_clnt.c +++ b/trunk/net/sunrpc/rpcb_clnt.c @@ -23,15 +23,12 @@ #include #include #include -#include #include #include #include #include -#include "netns.h" - #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND #endif @@ -112,7 +109,13 @@ enum { static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); -static const struct rpc_program rpcb_program; +static struct rpc_program rpcb_program; + +static struct rpc_clnt * rpcb_local_clnt; +static struct rpc_clnt * rpcb_local_clnt4; + +DEFINE_SPINLOCK(rpcb_clnt_lock); +unsigned int rpcb_users; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -137,8 +140,8 @@ struct rpcb_info { struct rpc_procinfo * rpc_proc; }; -static const struct rpcb_info rpcb_next_version[]; -static const struct rpcb_info rpcb_next_version6[]; +static struct rpcb_info rpcb_next_version[]; +static struct rpcb_info rpcb_next_version6[]; static const struct rpc_call_ops rpcb_getport_ops = { .rpc_call_done = rpcb_getport_done, @@ -161,34 +164,32 @@ static void rpcb_map_release(void *data) kfree(map); } -static int rpcb_get_local(struct net *net) +static int rpcb_get_local(void) { int cnt; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - spin_lock(&sn->rpcb_clnt_lock); - if (sn->rpcb_users) - sn->rpcb_users++; - cnt = sn->rpcb_users; - spin_unlock(&sn->rpcb_clnt_lock); + spin_lock(&rpcb_clnt_lock); + if (rpcb_users) + rpcb_users++; + cnt = rpcb_users; + spin_unlock(&rpcb_clnt_lock); return cnt; } -void rpcb_put_local(struct net *net) +void rpcb_put_local(void) { - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct rpc_clnt *clnt = sn->rpcb_local_clnt; - struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; + struct rpc_clnt *clnt = rpcb_local_clnt; + struct rpc_clnt *clnt4 = rpcb_local_clnt4; int shutdown; - spin_lock(&sn->rpcb_clnt_lock); - if (--sn->rpcb_users == 0) { - sn->rpcb_local_clnt = NULL; - sn->rpcb_local_clnt4 = NULL; + spin_lock(&rpcb_clnt_lock); + if (--rpcb_users == 0) { + rpcb_local_clnt = NULL; + rpcb_local_clnt4 = NULL; } - shutdown = !sn->rpcb_users; - spin_unlock(&sn->rpcb_clnt_lock); + shutdown = !rpcb_users; + spin_unlock(&rpcb_clnt_lock); if (shutdown) { /* @@ -201,34 +202,30 @@ void rpcb_put_local(struct net *net) } } -static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) +static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) { - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - /* Protected by rpcb_create_local_mutex */ - sn->rpcb_local_clnt = clnt; - sn->rpcb_local_clnt4 = clnt4; + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; smp_wmb(); - sn->rpcb_users = 1; + rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p) for net %p%s\n", - sn->rpcb_local_clnt, sn->rpcb_local_clnt4, - net, (net == &init_net) ? " (init_net)" : ""); + "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt, + rpcb_local_clnt4); } /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_unix(struct net *net) +static int rpcb_create_local_unix(void) { static const struct sockaddr_un rpcb_localaddr_rpcbind = { .sun_family = AF_LOCAL, .sun_path = RPCBIND_SOCK_PATHNAME, }; struct rpc_create_args args = { - .net = net, + .net = &init_net, .protocol = XPRT_TRANSPORT_LOCAL, .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, .addrsize = sizeof(rpcb_localaddr_rpcbind), @@ -261,7 +258,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(clnt, clnt4); out: return result; @@ -271,7 +268,7 @@ static int rpcb_create_local_unix(struct net *net) * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_net(struct net *net) +static int rpcb_create_local_net(void) { static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_family = AF_INET, @@ -279,7 +276,7 @@ static int rpcb_create_local_net(struct net *net) .sin_port = htons(RPCBIND_PORT), }; struct rpc_create_args args = { - .net = net, + .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), @@ -313,7 +310,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(clnt, clnt4); out: return result; @@ -323,32 +320,31 @@ static int rpcb_create_local_net(struct net *net) * Returns zero on success, otherwise a negative errno value * is returned. */ -int rpcb_create_local(struct net *net) +int rpcb_create_local(void) { static DEFINE_MUTEX(rpcb_create_local_mutex); int result = 0; - if (rpcb_get_local(net)) + if (rpcb_get_local()) return result; mutex_lock(&rpcb_create_local_mutex); - if (rpcb_get_local(net)) + if (rpcb_get_local()) goto out; - if (rpcb_create_local_unix(net) != 0) - result = rpcb_create_local_net(net); + if (rpcb_create_local_unix() != 0) + result = rpcb_create_local_net(); out: mutex_unlock(&rpcb_create_local_mutex); return result; } -static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, - struct sockaddr *srvaddr, size_t salen, - int proto, u32 version) +static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, + size_t salen, int proto, u32 version) { struct rpc_create_args args = { - .net = net, + .net = &init_net, .protocol = proto, .address = srvaddr, .addrsize = salen, @@ -424,7 +420,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 * addresses). */ -int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short port) +int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) { struct rpcbind_args map = { .r_prog = prog, @@ -435,7 +431,6 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short struct rpc_message msg = { .rpc_argp = &map, }; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -445,14 +440,13 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(rpcb_local_clnt, &msg); } /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_inet4(struct sunrpc_net *sn, - const struct sockaddr *sap, +static int rpcb_register_inet4(const struct sockaddr *sap, struct rpc_message *msg) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; @@ -471,7 +465,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -479,8 +473,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_inet6(struct sunrpc_net *sn, - const struct sockaddr *sap, +static int rpcb_register_inet6(const struct sockaddr *sap, struct rpc_message *msg) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; @@ -499,13 +492,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } -static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, - struct rpc_message *msg) +static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) { struct rpcbind_args *map = msg->rpc_argp; @@ -516,7 +508,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(rpcb_local_clnt4, msg); } /** @@ -562,7 +554,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, * service on any IPv4 address, but not on IPv6. The latter * advertises the service on all IPv4 and IPv6 addresses. */ -int rpcb_v4_register(struct net *net, const u32 program, const u32 version, +int rpcb_v4_register(const u32 program, const u32 version, const struct sockaddr *address, const char *netid) { struct rpcbind_args map = { @@ -574,19 +566,18 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - if (sn->rpcb_local_clnt4 == NULL) + if (rpcb_local_clnt4 == NULL) return -EPROTONOSUPPORT; if (address == NULL) - return rpcb_unregister_all_protofamilies(sn, &msg); + return rpcb_unregister_all_protofamilies(&msg); switch (address->sa_family) { case AF_INET: - return rpcb_register_inet4(sn, address, &msg); + return rpcb_register_inet4(address, &msg); case AF_INET6: - return rpcb_register_inet6(sn, address, &msg); + return rpcb_register_inet6(address, &msg); } return -EAFNOSUPPORT; @@ -620,10 +611,9 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) { struct rpc_clnt *parent = clnt->cl_parent; - struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt); while (parent != clnt) { - if (rcu_dereference(parent->cl_xprt) != xprt) + if (parent->cl_xprt != clnt->cl_xprt) break; if (clnt->cl_autobind) break; @@ -654,16 +644,12 @@ void rpcb_getport_async(struct rpc_task *task) size_t salen; int status; - rcu_read_lock(); - do { - clnt = rpcb_find_transport_owner(task->tk_client); - xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); - } while (xprt == NULL); - rcu_read_unlock(); + clnt = rpcb_find_transport_owner(task->tk_client); + xprt = clnt->cl_xprt; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, - xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); + clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ @@ -672,7 +658,6 @@ void rpcb_getport_async(struct rpc_task *task) if (xprt_test_and_set_binding(xprt)) { dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); - xprt_put(xprt); return; } @@ -714,8 +699,8 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); - rpcb_clnt = rpcb_create(xprt->xprt_net, xprt->servername, sap, salen, - xprt->prot, bind_version); + rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, + bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", @@ -740,7 +725,7 @@ void rpcb_getport_async(struct rpc_task *task) switch (bind_version) { case RPCBVERS_4: case RPCBVERS_3: - map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); map->r_owner = ""; break; @@ -769,7 +754,6 @@ void rpcb_getport_async(struct rpc_task *task) bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); task->tk_status = status; - xprt_put(xprt); } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -817,11 +801,11 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb) { + struct rpc_task *task = req->rq_task; __be32 *p; dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, + task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); @@ -834,6 +818,7 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, struct rpcbind_args *rpcb) { + struct rpc_task *task = req->rq_task; unsigned long port; __be32 *p; @@ -844,8 +829,8 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, port); + dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -856,6 +841,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, unsigned int *boolp) { + struct rpc_task *task = req->rq_task; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -867,8 +853,7 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 1; dprintk("RPC: %5u RPCB_%s call %s\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, + task->tk_pid, task->tk_msg.rpc_proc->p_name, (*boolp ? "succeeded" : "failed")); return 0; } @@ -888,11 +873,11 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string, static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb) { + struct rpc_task *task = req->rq_task; __be32 *p; dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, + task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_netid, rpcb->r_addr); @@ -910,6 +895,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, { struct sockaddr_storage address; struct sockaddr *sap = (struct sockaddr *)&address; + struct rpc_task *task = req->rq_task; __be32 *p; u32 len; @@ -926,7 +912,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, */ if (len == 0) { dprintk("RPC: %5u RPCB reply: program not registered\n", - req->rq_task->tk_pid); + task->tk_pid); return 0; } @@ -936,11 +922,10 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, (char *)p); + dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, (char *)p); - if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, - sap, sizeof(address)) == 0) + if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) goto out_fail; rpcb->r_port = rpc_get_port(sap); @@ -948,8 +933,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, out_fail: dprintk("RPC: %5u malformed RPCB_%s reply\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name); + task->tk_pid, task->tk_msg.rpc_proc->p_name); return -EIO; } @@ -1057,7 +1041,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { }, }; -static const struct rpcb_info rpcb_next_version[] = { +static struct rpcb_info rpcb_next_version[] = { { .rpc_vers = RPCBVERS_2, .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], @@ -1067,7 +1051,7 @@ static const struct rpcb_info rpcb_next_version[] = { }, }; -static const struct rpcb_info rpcb_next_version6[] = { +static struct rpcb_info rpcb_next_version6[] = { { .rpc_vers = RPCBVERS_4, .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], @@ -1081,25 +1065,25 @@ static const struct rpcb_info rpcb_next_version6[] = { }, }; -static const struct rpc_version rpcb_version2 = { +static struct rpc_version rpcb_version2 = { .number = RPCBVERS_2, .nrprocs = ARRAY_SIZE(rpcb_procedures2), .procs = rpcb_procedures2 }; -static const struct rpc_version rpcb_version3 = { +static struct rpc_version rpcb_version3 = { .number = RPCBVERS_3, .nrprocs = ARRAY_SIZE(rpcb_procedures3), .procs = rpcb_procedures3 }; -static const struct rpc_version rpcb_version4 = { +static struct rpc_version rpcb_version4 = { .number = RPCBVERS_4, .nrprocs = ARRAY_SIZE(rpcb_procedures4), .procs = rpcb_procedures4 }; -static const struct rpc_version *rpcb_version[] = { +static struct rpc_version *rpcb_version[] = { NULL, NULL, &rpcb_version2, @@ -1109,7 +1093,7 @@ static const struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -static const struct rpc_program rpcb_program = { +static struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), diff --git a/trunk/net/sunrpc/sched.c b/trunk/net/sunrpc/sched.c index 994cfea2bad6..3341d8962786 100644 --- a/trunk/net/sunrpc/sched.c +++ b/trunk/net/sunrpc/sched.c @@ -28,9 +28,6 @@ #define RPCDBG_FACILITY RPCDBG_SCHED #endif -#define CREATE_TRACE_POINTS -#include - /* * RPC slabs and memory pools */ @@ -208,7 +205,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->qlen = 0; setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); INIT_LIST_HEAD(&queue->timer_list.list); - rpc_assign_waitqueue_name(queue, qname); +#ifdef RPC_DEBUG + queue->name = qname; +#endif } void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) @@ -252,8 +251,6 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - trace_rpc_task_begin(task->tk_client, task, NULL); - rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } @@ -270,8 +267,6 @@ static int rpc_complete_task(struct rpc_task *task) unsigned long flags; int ret; - trace_rpc_task_complete(task->tk_client, task, NULL); - spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ret = atomic_dec_and_test(&task->tk_count); @@ -329,8 +324,6 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - trace_rpc_task_sleep(task->tk_client, task, q); - __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); @@ -385,8 +378,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task return; } - trace_rpc_task_wakeup(task->tk_client, task, queue); - __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -431,7 +422,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the next task on a priority queue. */ -static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue) +static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) { struct list_head *q; struct rpc_task *task; @@ -476,54 +467,30 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: + rpc_wake_up_task_queue_locked(queue, task); return task; } -static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) -{ - if (RPC_IS_PRIORITY(queue)) - return __rpc_find_next_queued_priority(queue); - if (!list_empty(&queue->tasks[0])) - return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list); - return NULL; -} - /* - * Wake up the first task on the wait queue. + * Wake up the next task on the wait queue. */ -struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, - bool (*func)(struct rpc_task *, void *), void *data) +struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_first(%p \"%s\")\n", + dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); - task = __rpc_find_next_queued(queue); - if (task != NULL) { - if (func(task, data)) + if (RPC_IS_PRIORITY(queue)) + task = __rpc_wake_up_next_priority(queue); + else { + task_for_first(task, &queue->tasks[0]) rpc_wake_up_task_queue_locked(queue, task); - else - task = NULL; } spin_unlock_bh(&queue->lock); return task; } -EXPORT_SYMBOL_GPL(rpc_wake_up_first); - -static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) -{ - return true; -} - -/* - * Wake up the next task on the wait queue. -*/ -struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) -{ - return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL); -} EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** @@ -534,18 +501,14 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { + struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - while (!list_empty(head)) { - struct rpc_task *task; - task = list_first_entry(head, - struct rpc_task, - u.tk_wait.list); + list_for_each_entry_safe(task, next, head, u.tk_wait.list) rpc_wake_up_task_queue_locked(queue, task); - } if (head == &queue->tasks[0]) break; head--; @@ -563,16 +526,13 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { + struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - while (!list_empty(head)) { - struct rpc_task *task; - task = list_first_entry(head, - struct rpc_task, - u.tk_wait.list); + list_for_each_entry_safe(task, next, head, u.tk_wait.list) { task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } @@ -717,7 +677,6 @@ static void __rpc_execute(struct rpc_task *task) if (do_action == NULL) break; } - trace_rpc_task_run_action(task->tk_client, task, task->tk_action); do_action(task); /* diff --git a/trunk/net/sunrpc/stats.c b/trunk/net/sunrpc/stats.c index bc2068ee795b..80df89d957ba 100644 --- a/trunk/net/sunrpc/stats.c +++ b/trunk/net/sunrpc/stats.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "netns.h" @@ -134,19 +133,20 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); /** * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task - * @stats: array of stat structures * * Relies on the caller for serialization. */ -void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) +void rpc_count_iostats(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + struct rpc_iostats *stats; struct rpc_iostats *op_metrics; ktime_t delta; - if (!stats || !req) + if (!task->tk_client || !task->tk_client->cl_metrics || !req) return; + stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; @@ -164,7 +164,6 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) delta = ktime_sub(ktime_get(), task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); } -EXPORT_SYMBOL_GPL(rpc_count_iostats); static void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs) @@ -180,7 +179,7 @@ static void _print_name(struct seq_file *seq, unsigned int op, void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) { struct rpc_iostats *stats = clnt->cl_metrics; - struct rpc_xprt *xprt; + struct rpc_xprt *xprt = clnt->cl_xprt; unsigned int op, maxproc = clnt->cl_maxproc; if (!stats) @@ -190,11 +189,8 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) seq_printf(seq, "p/v: %u/%u (%s)\n", clnt->cl_prog, clnt->cl_vers, clnt->cl_protname); - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); if (xprt) xprt->ops->print_stats(xprt, seq); - rcu_read_unlock(); seq_printf(seq, "\tper-op statistics\n"); for (op = 0; op < maxproc; op++) { @@ -217,46 +213,45 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats); * Register/unregister RPC proc files */ static inline struct proc_dir_entry * -do_register(struct net *net, const char *name, void *data, - const struct file_operations *fops) +do_register(const char *name, void *data, const struct file_operations *fops) { struct sunrpc_net *sn; dprintk("RPC: registering /proc/net/rpc/%s\n", name); - sn = net_generic(net, sunrpc_net_id); + sn = net_generic(&init_net, sunrpc_net_id); return proc_create_data(name, 0, sn->proc_net_rpc, fops, data); } struct proc_dir_entry * -rpc_proc_register(struct net *net, struct rpc_stat *statp) +rpc_proc_register(struct rpc_stat *statp) { - return do_register(net, statp->program->name, statp, &rpc_proc_fops); + return do_register(statp->program->name, statp, &rpc_proc_fops); } EXPORT_SYMBOL_GPL(rpc_proc_register); void -rpc_proc_unregister(struct net *net, const char *name) +rpc_proc_unregister(const char *name) { struct sunrpc_net *sn; - sn = net_generic(net, sunrpc_net_id); + sn = net_generic(&init_net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(rpc_proc_unregister); struct proc_dir_entry * -svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops) +svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) { - return do_register(net, statp->program->pg_name, statp, fops); + return do_register(statp->program->pg_name, statp, fops); } EXPORT_SYMBOL_GPL(svc_proc_register); void -svc_proc_unregister(struct net *net, const char *name) +svc_proc_unregister(const char *name) { struct sunrpc_net *sn; - sn = net_generic(net, sunrpc_net_id); + sn = net_generic(&init_net, sunrpc_net_id); remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(svc_proc_unregister); diff --git a/trunk/net/sunrpc/sunrpc.h b/trunk/net/sunrpc/sunrpc.h index 14c9f6d1c5ff..90c292e2738b 100644 --- a/trunk/net/sunrpc/sunrpc.h +++ b/trunk/net/sunrpc/sunrpc.h @@ -47,7 +47,5 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); -int rpc_clients_notifier_register(void); -void rpc_clients_notifier_unregister(void); #endif /* _NET_SUNRPC_SUNRPC_H */ diff --git a/trunk/net/sunrpc/sunrpc_syms.c b/trunk/net/sunrpc/sunrpc_syms.c index 8adfc88e793a..8ec9778c3f4a 100644 --- a/trunk/net/sunrpc/sunrpc_syms.c +++ b/trunk/net/sunrpc/sunrpc_syms.c @@ -25,12 +25,10 @@ #include "netns.h" int sunrpc_net_id; -EXPORT_SYMBOL_GPL(sunrpc_net_id); static __net_init int sunrpc_init_net(struct net *net) { int err; - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); err = rpc_proc_init(net); if (err) @@ -40,18 +38,8 @@ static __net_init int sunrpc_init_net(struct net *net) if (err) goto err_ipmap; - err = unix_gid_cache_create(net); - if (err) - goto err_unixgid; - - rpc_pipefs_init_net(net); - INIT_LIST_HEAD(&sn->all_clients); - spin_lock_init(&sn->rpc_client_lock); - spin_lock_init(&sn->rpcb_clnt_lock); return 0; -err_unixgid: - ip_map_cache_destroy(net); err_ipmap: rpc_proc_exit(net); err_proc: @@ -60,7 +48,6 @@ static __net_init int sunrpc_init_net(struct net *net) static __net_exit void sunrpc_exit_net(struct net *net) { - unix_gid_cache_destroy(net); ip_map_cache_destroy(net); rpc_proc_exit(net); } @@ -72,6 +59,8 @@ static struct pernet_operations sunrpc_net_ops = { .size = sizeof(struct sunrpc_net), }; +extern struct cache_detail unix_gid_cache; + static int __init init_sunrpc(void) { @@ -93,6 +82,7 @@ init_sunrpc(void) #ifdef RPC_DEBUG rpc_register_sysctl(); #endif + cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; @@ -115,6 +105,7 @@ cleanup_sunrpc(void) svc_cleanup_xprt_sock(); unregister_rpc_pipefs(); rpc_destroy_mempool(); + cache_unregister(&unix_gid_cache); unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); diff --git a/trunk/net/sunrpc/svc.c b/trunk/net/sunrpc/svc.c index 4153846984ac..e4aabc02368b 100644 --- a/trunk/net/sunrpc/svc.c +++ b/trunk/net/sunrpc/svc.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -31,7 +30,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP -static void svc_unregister(const struct svc_serv *serv, struct net *net); +static void svc_unregister(const struct svc_serv *serv); #define svc_serv_is_pooled(serv) ((serv)->sv_function) @@ -369,24 +368,23 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) return &serv->sv_pools[pidx % serv->sv_nrpools]; } -int svc_rpcb_setup(struct svc_serv *serv, struct net *net) +static int svc_rpcb_setup(struct svc_serv *serv) { int err; - err = rpcb_create_local(net); + err = rpcb_create_local(); if (err) return err; /* Remove any stale portmap registrations */ - svc_unregister(serv, net); + svc_unregister(serv); return 0; } -EXPORT_SYMBOL_GPL(svc_rpcb_setup); -void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) +void svc_rpcb_cleanup(struct svc_serv *serv) { - svc_unregister(serv, net); - rpcb_put_local(net); + svc_unregister(serv); + rpcb_put_local(); } EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); @@ -412,7 +410,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -472,7 +470,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, } if (svc_uses_rpcbind(serv)) { - if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) { + if (svc_rpcb_setup(serv) < 0) { kfree(serv->sv_pools); kfree(serv); return NULL; @@ -486,7 +484,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + void (*shutdown)(struct svc_serv *serv)) { return __svc_create(prog, bufsize, /*npools*/1, shutdown); } @@ -494,7 +492,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net), + void (*shutdown)(struct svc_serv *serv), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; @@ -511,24 +509,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, } EXPORT_SYMBOL_GPL(svc_create_pooled); -void svc_shutdown_net(struct svc_serv *serv, struct net *net) -{ - /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: - */ - svc_close_net(serv, net); - - if (serv->sv_shutdown) - serv->sv_shutdown(serv, net); -} -EXPORT_SYMBOL_GPL(svc_shutdown_net); - /* * Destroy an RPC service. Should be called with appropriate locking to * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. @@ -536,8 +516,6 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net); void svc_destroy(struct svc_serv *serv) { - struct net *net = current->nsproxy->net_ns; - dprintk("svc: svc_destroy(%s, %d)\n", serv->sv_program->pg_name, serv->sv_nrthreads); @@ -551,15 +529,19 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - - svc_shutdown_net(serv, net); - /* - * The last user is gone and thus all sockets have to be destroyed to - * the point. Check this. + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: */ - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); + svc_close_all(serv); + + if (serv->sv_shutdown) + serv->sv_shutdown(serv); cache_clean_deferred(serv); @@ -813,8 +795,7 @@ EXPORT_SYMBOL_GPL(svc_exit_thread); * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_rpcb_register4(struct net *net, const u32 program, - const u32 version, +static int __svc_rpcb_register4(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { @@ -837,7 +818,7 @@ static int __svc_rpcb_register4(struct net *net, const u32 program, return -ENOPROTOOPT; } - error = rpcb_v4_register(net, program, version, + error = rpcb_v4_register(program, version, (const struct sockaddr *)&sin, netid); /* @@ -845,7 +826,7 @@ static int __svc_rpcb_register4(struct net *net, const u32 program, * registration request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(net, program, version, protocol, port); + error = rpcb_register(program, version, protocol, port); return error; } @@ -861,8 +842,7 @@ static int __svc_rpcb_register4(struct net *net, const u32 program, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_rpcb_register6(struct net *net, const u32 program, - const u32 version, +static int __svc_rpcb_register6(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { @@ -885,7 +865,7 @@ static int __svc_rpcb_register6(struct net *net, const u32 program, return -ENOPROTOOPT; } - error = rpcb_v4_register(net, program, version, + error = rpcb_v4_register(program, version, (const struct sockaddr *)&sin6, netid); /* @@ -905,7 +885,7 @@ static int __svc_rpcb_register6(struct net *net, const u32 program, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_register(struct net *net, const char *progname, +static int __svc_register(const char *progname, const u32 program, const u32 version, const int family, const unsigned short protocol, @@ -915,12 +895,12 @@ static int __svc_register(struct net *net, const char *progname, switch (family) { case PF_INET: - error = __svc_rpcb_register4(net, program, version, + error = __svc_rpcb_register4(program, version, protocol, port); break; #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: - error = __svc_rpcb_register6(net, program, version, + error = __svc_rpcb_register6(program, version, protocol, port); #endif } @@ -934,16 +914,14 @@ static int __svc_register(struct net *net, const char *progname, /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register - * @net: net namespace for the service to register * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, struct net *net, - const int family, const unsigned short proto, - const unsigned short port) +int svc_register(const struct svc_serv *serv, const int family, + const unsigned short proto, const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -968,7 +946,7 @@ int svc_register(const struct svc_serv *serv, struct net *net, if (progp->pg_vers[i]->vs_hidden) continue; - error = __svc_register(net, progp->pg_name, progp->pg_prog, + error = __svc_register(progp->pg_name, progp->pg_prog, i, family, proto, port); if (error < 0) break; @@ -985,19 +963,19 @@ int svc_register(const struct svc_serv *serv, struct net *net, * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient * in this case to clear all existing entries for [program, version]. */ -static void __svc_unregister(struct net *net, const u32 program, const u32 version, +static void __svc_unregister(const u32 program, const u32 version, const char *progname) { int error; - error = rpcb_v4_register(net, program, version, NULL, ""); + error = rpcb_v4_register(program, version, NULL, ""); /* * User space didn't support rpcbind v4, so retry this * request with the legacy rpcbind v2 protocol. */ if (error == -EPROTONOSUPPORT) - error = rpcb_register(net, program, version, 0, 0); + error = rpcb_register(program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); @@ -1011,7 +989,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi * The result of unregistration is reported via dprintk for those who want * verification of the result, but is otherwise not important. */ -static void svc_unregister(const struct svc_serv *serv, struct net *net) +static void svc_unregister(const struct svc_serv *serv) { struct svc_program *progp; unsigned long flags; @@ -1028,7 +1006,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) dprintk("svc: attempting to unregister %sv%u\n", progp->pg_name, i); - __svc_unregister(net, progp->pg_prog, i, progp->pg_name); + __svc_unregister(progp->pg_prog, i, progp->pg_name); } } diff --git a/trunk/net/sunrpc/svc_xprt.c b/trunk/net/sunrpc/svc_xprt.c index 4bda09d7e1a4..74cb0d8e9ca1 100644 --- a/trunk/net/sunrpc/svc_xprt.c +++ b/trunk/net/sunrpc/svc_xprt.c @@ -922,65 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct list_head *xprt_list, struct net *net) +static void svc_close_list(struct list_head *xprt_list) { struct svc_xprt *xprt; list_for_each_entry(xprt, xprt_list, xpt_list) { - if (xprt->xpt_net != net) - continue; set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_BUSY, &xprt->xpt_flags); } } -static void svc_clear_pools(struct svc_serv *serv, struct net *net) +void svc_close_all(struct svc_serv *serv) { struct svc_pool *pool; struct svc_xprt *xprt; struct svc_xprt *tmp; int i; + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[i]; spin_lock_bh(&pool->sp_lock); - list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { - if (xprt->xpt_net != net) - continue; + while (!list_empty(&pool->sp_sockets)) { + xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); list_del_init(&xprt->xpt_ready); } spin_unlock_bh(&pool->sp_lock); } -} - -static void svc_clear_list(struct list_head *xprt_list, struct net *net) -{ - struct svc_xprt *xprt; - struct svc_xprt *tmp; - - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - if (xprt->xpt_net != net) - continue; - svc_delete_xprt(xprt); - } - list_for_each_entry(xprt, xprt_list, xpt_list) - BUG_ON(xprt->xpt_net == net); -} - -void svc_close_net(struct svc_serv *serv, struct net *net) -{ - svc_close_list(&serv->sv_tempsocks, net); - svc_close_list(&serv->sv_permsocks, net); - - svc_clear_pools(serv, net); /* * At this point the sp_sockets lists will stay empty, since * svc_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ - svc_clear_list(&serv->sv_tempsocks, net); - svc_clear_list(&serv->sv_permsocks, net); + list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) + svc_delete_xprt(xprt); + list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) + svc_delete_xprt(xprt); + + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); } /* @@ -1106,7 +1089,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * svc_find_xprt - find an RPC transport instance * @serv: pointer to svc_serv to search * @xcl_name: C string containing transport's class name - * @net: owner net pointer * @af: Address family of transport's local address * @port: transport's IP port number * @@ -1119,8 +1101,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * service's list that has a matching class name. */ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, - struct net *net, const sa_family_t af, - const unsigned short port) + const sa_family_t af, const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; @@ -1131,8 +1112,6 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - if (xprt->xpt_net != net) - continue; if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) diff --git a/trunk/net/sunrpc/svcauth_unix.c b/trunk/net/sunrpc/svcauth_unix.c index bcd574f2ac56..01153ead1dba 100644 --- a/trunk/net/sunrpc/svcauth_unix.c +++ b/trunk/net/sunrpc/svcauth_unix.c @@ -211,7 +211,7 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0) + if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; switch (address.sa.sa_family) { case AF_INET: @@ -436,6 +436,7 @@ struct unix_gid { uid_t uid; struct group_info *gi; }; +static struct cache_head *gid_table[GID_HASHMAX]; static void unix_gid_put(struct kref *kref) { @@ -493,7 +494,8 @@ static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); } -static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); +static struct unix_gid *unix_gid_lookup(uid_t uid); +extern struct cache_detail unix_gid_cache; static int unix_gid_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -537,19 +539,19 @@ static int unix_gid_parse(struct cache_detail *cd, GROUP_AT(ug.gi, i) = gid; } - ugp = unix_gid_lookup(cd, uid); + ugp = unix_gid_lookup(uid); if (ugp) { struct cache_head *ch; ug.h.flags = 0; ug.h.expiry_time = expiry; - ch = sunrpc_cache_update(cd, + ch = sunrpc_cache_update(&unix_gid_cache, &ug.h, &ugp->h, hash_long(uid, GID_HASHBITS)); if (!ch) err = -ENOMEM; else { err = 0; - cache_put(ch, cd); + cache_put(ch, &unix_gid_cache); } } else err = -ENOMEM; @@ -585,9 +587,10 @@ static int unix_gid_show(struct seq_file *m, return 0; } -static struct cache_detail unix_gid_cache_template = { +struct cache_detail unix_gid_cache = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, + .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, @@ -599,42 +602,14 @@ static struct cache_detail unix_gid_cache_template = { .alloc = unix_gid_alloc, }; -int unix_gid_cache_create(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd; - int err; - - cd = cache_create_net(&unix_gid_cache_template, net); - if (IS_ERR(cd)) - return PTR_ERR(cd); - err = cache_register_net(cd, net); - if (err) { - cache_destroy_net(cd, net); - return err; - } - sn->unix_gid_cache = cd; - return 0; -} - -void unix_gid_cache_destroy(struct net *net) -{ - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd = sn->unix_gid_cache; - - sn->unix_gid_cache = NULL; - cache_purge(cd); - cache_unregister_net(cd, net); - cache_destroy_net(cd, net); -} - -static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid) +static struct unix_gid *unix_gid_lookup(uid_t uid) { struct unix_gid ug; struct cache_head *ch; ug.uid = uid; - ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS)); + ch = sunrpc_cache_lookup(&unix_gid_cache, &ug.h, + hash_long(uid, GID_HASHBITS)); if (ch) return container_of(ch, struct unix_gid, h); else @@ -646,13 +621,11 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) struct unix_gid *ug; struct group_info *gi; int ret; - struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, - sunrpc_net_id); - ug = unix_gid_lookup(sn->unix_gid_cache, uid); + ug = unix_gid_lookup(uid); if (!ug) return ERR_PTR(-EAGAIN); - ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle); + ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); @@ -660,7 +633,7 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); - cache_put(&ug->h, sn->unix_gid_cache); + cache_put(&ug->h, &unix_gid_cache); return gi; default: return ERR_PTR(-EAGAIN); @@ -876,45 +849,56 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; -static struct cache_detail ip_map_cache_template = { - .owner = THIS_MODULE, - .hash_size = IP_HASHMAX, - .name = "auth.unix.ip", - .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, - .cache_parse = ip_map_parse, - .cache_show = ip_map_show, - .match = ip_map_match, - .init = ip_map_init, - .update = update, - .alloc = ip_map_alloc, -}; - int ip_map_cache_create(struct net *net) { - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + int err = -ENOMEM; struct cache_detail *cd; - int err; + struct cache_head **tbl; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + goto err_cd; + + tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL); + if (tbl == NULL) + goto err_tbl; + + cd->owner = THIS_MODULE, + cd->hash_size = IP_HASHMAX, + cd->hash_table = tbl, + cd->name = "auth.unix.ip", + cd->cache_put = ip_map_put, + cd->cache_upcall = ip_map_upcall, + cd->cache_parse = ip_map_parse, + cd->cache_show = ip_map_show, + cd->match = ip_map_match, + cd->init = ip_map_init, + cd->update = update, + cd->alloc = ip_map_alloc, - cd = cache_create_net(&ip_map_cache_template, net); - if (IS_ERR(cd)) - return PTR_ERR(cd); err = cache_register_net(cd, net); - if (err) { - cache_destroy_net(cd, net); - return err; - } + if (err) + goto err_reg; + sn->ip_map_cache = cd; return 0; + +err_reg: + kfree(tbl); +err_tbl: + kfree(cd); +err_cd: + return err; } void ip_map_cache_destroy(struct net *net) { - struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); - struct cache_detail *cd = sn->ip_map_cache; + struct sunrpc_net *sn; - sn->ip_map_cache = NULL; - cache_purge(cd); - cache_unregister_net(cd, net); - cache_destroy_net(cd, net); + sn = net_generic(net, sunrpc_net_id); + cache_purge(sn->ip_map_cache); + cache_unregister_net(sn->ip_map_cache, net); + kfree(sn->ip_map_cache->hash_table); + kfree(sn->ip_map_cache); } diff --git a/trunk/net/sunrpc/svcsock.c b/trunk/net/sunrpc/svcsock.c index 40ae884db865..464570906f80 100644 --- a/trunk/net/sunrpc/svcsock.c +++ b/trunk/net/sunrpc/svcsock.c @@ -396,7 +396,7 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp, int buflen, unsigned int base) { size_t save_iovlen; - void *save_iovbase; + void __user *save_iovbase; unsigned int i; int ret; @@ -1409,8 +1409,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, sock_net(sock->sk), inet->sk_family, - inet->sk_protocol, + *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); if (*errp < 0) { diff --git a/trunk/net/sunrpc/sysctl.c b/trunk/net/sunrpc/sysctl.c index af7d339add9d..e65dcc613339 100644 --- a/trunk/net/sunrpc/sysctl.c +++ b/trunk/net/sunrpc/sysctl.c @@ -20,8 +20,6 @@ #include #include -#include "netns.h" - /* * Declare the debug flags here */ @@ -112,7 +110,7 @@ proc_dodebug(ctl_table *table, int write, *(unsigned int *) table->data = value; /* Display the RPC tasks on writing to rpc_debug */ if (strcmp(table->procname, "rpc_debug") == 0) - rpc_show_tasks(&init_net); + rpc_show_tasks(); } else { if (!access_ok(VERIFY_WRITE, buffer, left)) return -EFAULT; diff --git a/trunk/net/sunrpc/xprt.c b/trunk/net/sunrpc/xprt.c index 0cbcd1ab49ab..c64c0ef519b5 100644 --- a/trunk/net/sunrpc/xprt.c +++ b/trunk/net/sunrpc/xprt.c @@ -66,7 +66,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); -static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -293,57 +292,54 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return retval; } -static bool __xprt_lock_write_func(struct rpc_task *task, void *data) +static void __xprt_lock_write_next(struct rpc_xprt *xprt) { - struct rpc_xprt *xprt = data; + struct rpc_task *task; struct rpc_rqst *req; + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; + req = task->tk_rqstp; xprt->snd_task = task; if (req) { req->rq_bytes_sent = 0; req->rq_ntrans++; } - return true; -} - -static void __xprt_lock_write_next(struct rpc_xprt *xprt) -{ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; + return; - if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt)) - return; +out_unlock: xprt_clear_locked(xprt); } -static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) +static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { - struct rpc_xprt *xprt = data; + struct rpc_task *task; struct rpc_rqst *req; + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + if (RPCXPRT_CONGESTED(xprt)) + goto out_unlock; + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; + req = task->tk_rqstp; if (req == NULL) { xprt->snd_task = task; - return true; + return; } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; req->rq_bytes_sent = 0; req->rq_ntrans++; - return true; - } - return false; -} - -static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) -{ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - if (RPCXPRT_CONGESTED(xprt)) - goto out_unlock; - if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt)) return; + } out_unlock: xprt_clear_locked(xprt); } @@ -716,7 +712,9 @@ void xprt_connect(struct rpc_task *task) if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { - task->tk_rqstp->rq_bytes_sent = 0; + if (task->tk_rqstp) + task->tk_rqstp->rq_bytes_sent = 0; + task->tk_timeout = task->tk_rqstp->rq_timeout; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); @@ -752,7 +750,7 @@ static void xprt_connect_status(struct rpc_task *task) default: dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, - xprt->servername); + task->tk_client->cl_server); xprt_release_write(xprt, task); task->tk_status = -EIO; } @@ -886,7 +884,7 @@ void xprt_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status, numreqs; + int status; dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); @@ -923,14 +921,9 @@ void xprt_transmit(struct rpc_task *task) xprt->ops->set_retrans_timeout(task); - numreqs = atomic_read(&xprt->num_reqs); - if (numreqs > xprt->stat.max_slots) - xprt->stat.max_slots = numreqs; xprt->stat.sends++; xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; xprt->stat.bklog_u += xprt->backlog.qlen; - xprt->stat.sending_u += xprt->sending.qlen; - xprt->stat.pending_u += xprt->pending.qlen; /* Don't race with disconnect */ if (!xprt_connected(xprt)) @@ -1138,10 +1131,7 @@ void xprt_release(struct rpc_task *task) return; xprt = req->rq_xprt; - if (task->tk_ops->rpc_count_stats != NULL) - task->tk_ops->rpc_count_stats(task, task->tk_calldata); - else if (task->tk_client) - rpc_count_iostats(task, task->tk_client->cl_metrics); + rpc_count_iostats(task); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) @@ -1230,17 +1220,6 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) (unsigned long)xprt); else init_timer(&xprt->timer); - - if (strlen(args->servername) > RPC_MAXNETNAMELEN) { - xprt_destroy(xprt); - return ERR_PTR(-EINVAL); - } - xprt->servername = kstrdup(args->servername, GFP_KERNEL); - if (xprt->servername == NULL) { - xprt_destroy(xprt); - return ERR_PTR(-ENOMEM); - } - dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); out: @@ -1263,7 +1242,6 @@ static void xprt_destroy(struct rpc_xprt *xprt) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->backlog); cancel_work_sync(&xprt->task_cleanup); - kfree(xprt->servername); /* * Tear down transport state and free the rpc_xprt */ diff --git a/trunk/net/sunrpc/xprtrdma/rpc_rdma.c b/trunk/net/sunrpc/xprtrdma/rpc_rdma.c index 558fbab574f0..1776e5731dcf 100644 --- a/trunk/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/trunk/net/sunrpc/xprtrdma/rpc_rdma.c @@ -771,18 +771,13 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) /* get request object */ req = rpcr_to_rdmar(rqst); - if (req->rl_reply) { - spin_unlock(&xprt->transport_lock); - dprintk("RPC: %s: duplicate reply 0x%p to RPC " - "request 0x%p: xid 0x%08x\n", __func__, rep, req, - headerp->rm_xid); - goto repost; - } dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" " RPC request 0x%p xid 0x%08x\n", __func__, rep, req, rqst, headerp->rm_xid); + BUG_ON(!req || req->rl_reply); + /* from here on, the reply is no longer an orphan */ req->rl_reply = rep; diff --git a/trunk/net/sunrpc/xprtrdma/verbs.c b/trunk/net/sunrpc/xprtrdma/verbs.c index 745973b729af..28236bab57f9 100644 --- a/trunk/net/sunrpc/xprtrdma/verbs.c +++ b/trunk/net/sunrpc/xprtrdma/verbs.c @@ -1490,9 +1490,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, u8 key; int len, pageoff; int i, rc; - int seg_len; - u64 pa; - int page_no; pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -1500,15 +1497,11 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, len = -pageoff; if (*nsegs > RPCRDMA_MAX_DATA_SEGS) *nsegs = RPCRDMA_MAX_DATA_SEGS; - for (page_no = i = 0; i < *nsegs;) { + for (i = 0; i < *nsegs;) { rpcrdma_map_one(ia, seg, writing); - pa = seg->mr_dma; - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - seg1->mr_chunk.rl_mw->r.frmr.fr_pgl-> - page_list[page_no++] = pa; - pa += PAGE_SIZE; - } + seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; len += seg->mr_len; + BUG_ON(seg->mr_len > PAGE_SIZE); ++seg; ++i; /* Check for holes */ @@ -1547,9 +1540,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; - frmr_wr.wr.fast_reg.page_list_len = page_no; + frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; + frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; BUG_ON(frmr_wr.wr.fast_reg.length < len); frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : diff --git a/trunk/net/sunrpc/xprtsock.c b/trunk/net/sunrpc/xprtsock.c index 92bc5181dbeb..55472c48825e 100644 --- a/trunk/net/sunrpc/xprtsock.c +++ b/trunk/net/sunrpc/xprtsock.c @@ -53,12 +53,12 @@ static void xs_close(struct rpc_xprt *xprt); /* * xprtsock tunables */ -static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; -static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; +unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; +unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; -static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; -static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; +unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; #define XS_TCP_LINGER_TO (15U * HZ) static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; @@ -2227,7 +2227,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " - "%llu %llu %lu %llu %llu\n", + "%llu %llu\n", xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2236,10 +2236,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u, - xprt->stat.max_slots, - xprt->stat.sending_u, - xprt->stat.pending_u); + xprt->stat.bklog_u); } /** @@ -2252,18 +2249,14 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu " - "%lu %llu %llu\n", + seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", transport->srcport, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u, - xprt->stat.max_slots, - xprt->stat.sending_u, - xprt->stat.pending_u); + xprt->stat.bklog_u); } /** @@ -2280,8 +2273,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; - seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu " - "%llu %llu %lu %llu %llu\n", + seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", transport->srcport, xprt->stat.bind_count, xprt->stat.connect_count, @@ -2291,10 +2283,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, - xprt->stat.bklog_u, - xprt->stat.max_slots, - xprt->stat.sending_u, - xprt->stat.pending_u); + xprt->stat.bklog_u); } /* diff --git a/trunk/net/xfrm/xfrm_output.c b/trunk/net/xfrm/xfrm_output.c index 95a338c89f99..47bacd8c0250 100644 --- a/trunk/net/xfrm/xfrm_output.c +++ b/trunk/net/xfrm/xfrm_output.c @@ -21,7 +21,7 @@ static int xfrm_output2(struct sk_buff *skb); -static int xfrm_skb_check_space(struct sk_buff *skb) +static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) @@ -48,7 +48,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto resume; do { - err = xfrm_skb_check_space(skb); + err = xfrm_state_check_space(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; diff --git a/trunk/net/xfrm/xfrm_replay.c b/trunk/net/xfrm/xfrm_replay.c index 2f6d11d04a2b..39e02c54ed26 100644 --- a/trunk/net/xfrm/xfrm_replay.c +++ b/trunk/net/xfrm/xfrm_replay.c @@ -167,7 +167,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) @@ -279,7 +279,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) @@ -473,7 +473,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static struct xfrm_replay xfrm_replay_legacy = { diff --git a/trunk/security/keys/key.c b/trunk/security/keys/key.c index 06783cffb3af..7ada8019be1f 100644 --- a/trunk/security/keys/key.c +++ b/trunk/security/keys/key.c @@ -671,26 +671,6 @@ struct key_type *key_type_lookup(const char *type) return ktype; } -void key_set_timeout(struct key *key, unsigned timeout) -{ - struct timespec now; - time_t expiry = 0; - - /* make the changes with the locks held to prevent races */ - down_write(&key->sem); - - if (timeout > 0) { - now = current_kernel_time(); - expiry = now.tv_sec + timeout; - } - - key->expiry = expiry; - key_schedule_gc(key->expiry + key_gc_delay); - - up_write(&key->sem); -} -EXPORT_SYMBOL_GPL(key_set_timeout); - /* * Unlock a key type locked by key_type_lookup(). */ diff --git a/trunk/security/keys/keyctl.c b/trunk/security/keys/keyctl.c index fb767c6cd99f..6523599e9ac0 100644 --- a/trunk/security/keys/keyctl.c +++ b/trunk/security/keys/keyctl.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1258,8 +1257,10 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) */ long keyctl_set_timeout(key_serial_t id, unsigned timeout) { + struct timespec now; struct key *key, *instkey; key_ref_t key_ref; + time_t expiry; long ret; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, @@ -1285,7 +1286,20 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout) okay: key = key_ref_to_ptr(key_ref); - key_set_timeout(key, timeout); + + /* make the changes with the locks held to prevent races */ + down_write(&key->sem); + + expiry = 0; + if (timeout > 0) { + now = current_kernel_time(); + expiry = now.tv_sec + timeout; + } + + key->expiry = expiry; + key_schedule_gc(key->expiry + key_gc_delay); + + up_write(&key->sem); key_put(key); ret = 0; diff --git a/trunk/tools/testing/ktest/ktest.pl b/trunk/tools/testing/ktest/ktest.pl index 95d6a6f7c33a..758ec2a08c40 100755 --- a/trunk/tools/testing/ktest/ktest.pl +++ b/trunk/tools/testing/ktest/ktest.pl @@ -46,7 +46,6 @@ "DIE_ON_FAILURE" => 1, "SSH_EXEC" => "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND", "SCP_TO_TARGET" => "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE", - "SCP_TO_TARGET_INSTALL" => "\${SCP_TO_TARGET}", "REBOOT" => "ssh \$SSH_USER\@\$MACHINE reboot", "STOP_AFTER_SUCCESS" => 10, "STOP_AFTER_FAILURE" => 60, @@ -87,13 +86,11 @@ my $switch_to_good; my $switch_to_test; my $poweroff_on_error; -my $reboot_on_success; my $die_on_failure; my $powercycle_after_reboot; my $poweroff_after_halt; my $ssh_exec; my $scp_to_target; -my $scp_to_target_install; my $power_off; my $grub_menu; my $grub_number; @@ -214,7 +211,6 @@ "SWITCH_TO_GOOD" => \$switch_to_good, "SWITCH_TO_TEST" => \$switch_to_test, "POWEROFF_ON_ERROR" => \$poweroff_on_error, - "REBOOT_ON_SUCCESS" => \$reboot_on_success, "DIE_ON_FAILURE" => \$die_on_failure, "POWER_OFF" => \$power_off, "POWERCYCLE_AFTER_REBOOT" => \$powercycle_after_reboot, @@ -247,7 +243,6 @@ "BUILD_TARGET" => \$build_target, "SSH_EXEC" => \$ssh_exec, "SCP_TO_TARGET" => \$scp_to_target, - "SCP_TO_TARGET_INSTALL" => \$scp_to_target_install, "CHECKOUT" => \$checkout, "TARGET_IMAGE" => \$target_image, "LOCALVERSION" => \$localversion, @@ -1118,6 +1113,7 @@ sub reboot_to_good { if (defined($switch_to_good)) { run_command $switch_to_good; + return; } reboot $time; @@ -1353,7 +1349,8 @@ sub run_ssh { } sub run_scp { - my ($src, $dst, $cp_scp) = @_; + my ($src, $dst) = @_; + my $cp_scp = $scp_to_target; $cp_scp =~ s/\$SRC_FILE/$src/g; $cp_scp =~ s/\$DST_FILE/$dst/g; @@ -1361,22 +1358,6 @@ sub run_scp { return run_command "$cp_scp"; } -sub run_scp_install { - my ($src, $dst) = @_; - - my $cp_scp = $scp_to_target_install; - - return run_scp($src, $dst, $cp_scp); -} - -sub run_scp_mod { - my ($src, $dst) = @_; - - my $cp_scp = $scp_to_target; - - return run_scp($src, $dst, $cp_scp); -} - sub get_grub_index { if ($reboot_type ne "grub") { @@ -1479,7 +1460,6 @@ sub get_sha1 { sub monitor { my $booted = 0; my $bug = 0; - my $bug_ignored = 0; my $skip_call_trace = 0; my $loops; @@ -1551,13 +1531,9 @@ sub monitor { } if ($full_line =~ /call trace:/i) { - if (!$bug && !$skip_call_trace) { - if ($ignore_errors) { - $bug_ignored = 1; - } else { - $bug = 1; - $failure_start = time; - } + if (!$ignore_errors && !$bug && !$skip_call_trace) { + $bug = 1; + $failure_start = time; } } @@ -1619,10 +1595,6 @@ sub monitor { fail "failed - never got a boot prompt." and return 0; } - if ($bug_ignored) { - doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n"; - } - return 1; } @@ -1649,7 +1621,7 @@ sub install { my $cp_target = eval_kernel_version $target_image; - run_scp_install "$outputdir/$build_target", "$cp_target" or + run_scp "$outputdir/$build_target", "$cp_target" or dodie "failed to copy image"; my $install_mods = 0; @@ -1671,7 +1643,7 @@ sub install { return; } - run_command "$make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$tmpdir modules_install" or + run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or dodie "Failed to install modules"; my $modlib = "/lib/modules/$version"; @@ -1684,7 +1656,7 @@ sub install { run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or dodie "making tarball"; - run_scp_mod "$tmpdir/$modtar", "/tmp" or + run_scp "$tmpdir/$modtar", "/tmp" or dodie "failed to copy modules"; unlink "$tmpdir/$modtar"; @@ -3554,10 +3526,8 @@ sub set_test_option { die "failed to checkout $checkout"; } - # A test may opt to not reboot the box - if ($reboot_on_success) { - $no_reboot = 0; - } + $no_reboot = 0; + if ($test_type eq "bisect") { bisect $i; @@ -3602,12 +3572,8 @@ sub set_test_option { halt; } elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) { reboot_to_good; -} elsif (defined($switch_to_good)) { - # still need to get to the good kernel - run_command $switch_to_good; } - doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; exit 0; diff --git a/trunk/tools/testing/ktest/sample.conf b/trunk/tools/testing/ktest/sample.conf index b682456afda8..5ea04c6a71bf 100644 --- a/trunk/tools/testing/ktest/sample.conf +++ b/trunk/tools/testing/ktest/sample.conf @@ -710,18 +710,10 @@ # The variables SSH_USER, MACHINE and SSH_COMMAND are defined #SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND"; -# The way to copy a file to the target (install and modules) +# The way to copy a file to the target # (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE) -# The variables SSH_USER, MACHINE are defined by the config -# SRC_FILE and DST_FILE are ktest internal variables and -# should only have '$' and not the '${}' notation. -# (default scp $SRC_FILE ${SSH_USER}@${MACHINE}:$DST_FILE) -#SCP_TO_TARGET = echo skip scp for $SRC_FILE $DST_FILE - -# If install needs to be different than modules, then this -# option will override the SCP_TO_TARGET for installation. -# (default ${SCP_TO_TARGET} ) -#SCP_TO_TARGET_INSTALL = scp $SRC_FILE tftp@tftpserver:$DST_FILE +# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined. +#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE # The nice way to reboot the target # (default ssh $SSH_USER@$MACHINE reboot)