diff --git a/[refs] b/[refs] index 8f1af7b95d9d..cc9d7b8a0a36 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: db2fb9db5735cc532fd4fc55e94b9a3c3750378e +refs/heads/master: 80fc9f532d8c05d4cb12d55660624ce53a378349 diff --git a/trunk/Documentation/cpu-freq/core.txt b/trunk/Documentation/cpu-freq/core.txt index ce0666e51036..29b3f9ffc66c 100644 --- a/trunk/Documentation/cpu-freq/core.txt +++ b/trunk/Documentation/cpu-freq/core.txt @@ -24,7 +24,7 @@ Contents: 1. General Information ======================= -The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This +The CPUFreq core code is located in linux/kernel/cpufreq.c. This cpufreq code offers a standardized interface for the CPUFreq architecture drivers (those pieces of code that do actual frequency transitions), as well as to "notifiers". These are device diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt index 7ebca0775ec1..24f3c63b3017 100644 --- a/trunk/Documentation/feature-removal-schedule.txt +++ b/trunk/Documentation/feature-removal-schedule.txt @@ -280,25 +280,3 @@ Why: Orphaned for ages. SMP bugs long unfixed. Few users left Who: Jeff Garzik --------------------------- - -What: ACPI hooks (X86_SPEEDSTEP_CENTRINO_ACPI) in speedstep-centrino driver -When: December 2006 -Why: Speedstep-centrino driver with ACPI hooks and acpi-cpufreq driver are - functionally very much similar. They talk to ACPI in same way. Only - difference between them is the way they do frequency transitions. - One uses MSRs and the other one uses IO ports. Functionaliy of - speedstep_centrino with ACPI hooks is now merged into acpi-cpufreq. - That means one common driver will support all Intel Enhanced Speedstep - capable CPUs. That means less confusion over name of - speedstep-centrino driver (with that driver supposed to be used on - non-centrino platforms). That means less duplication of code and - less maintenance effort and no possibility of these two drivers - going out of sync. - Current users of speedstep_centrino with ACPI hooks are requested to - switch over to acpi-cpufreq driver. speedstep-centrino will continue - to work using older non-ACPI static table based scheme even after this - date. - -Who: Venkatesh Pallipadi - ---------------------------- diff --git a/trunk/Documentation/filesystems/00-INDEX b/trunk/Documentation/filesystems/00-INDEX index 4dc28cc93503..3c384c0cf86e 100644 --- a/trunk/Documentation/filesystems/00-INDEX +++ b/trunk/Documentation/filesystems/00-INDEX @@ -34,8 +34,6 @@ ext2.txt - info, mount options and specifications for the Ext2 filesystem. ext3.txt - info, mount options and specifications for the Ext3 filesystem. -ext4.txt - - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. fuse.txt diff --git a/trunk/Documentation/filesystems/ext4.txt b/trunk/Documentation/filesystems/ext4.txt deleted file mode 100644 index 6a4adcae9f9a..000000000000 --- a/trunk/Documentation/filesystems/ext4.txt +++ /dev/null @@ -1,236 +0,0 @@ - -Ext4 Filesystem -=============== - -This is a development version of the ext4 filesystem, an advanced level -of the ext3 filesystem which incorporates scalability and reliability -enhancements for supporting large filesystems (64 bit) in keeping with -increasing disk capacities and state-of-the-art feature requirements. - -Mailing list: linux-ext4@vger.kernel.org - - -1. Quick usage instructions: -=========================== - - - Grab updated e2fsprogs from - ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs-interim/ - This is a patchset on top of e2fsprogs-1.39, which can be found at - ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ - - - It's still mke2fs -j /dev/hda1 - - - mount /dev/hda1 /wherever -t ext4dev - - - To enable extents, - - mount /dev/hda1 /wherever -t ext4dev -o extents - - - The filesystem is compatible with the ext3 driver until you add a file - which has extents (ie: `mount -o extents', then create a file). - - NOTE: The "extents" mount flag is temporary. It will soon go away and - extents will be enabled by the "-o extents" flag to mke2fs or tune2fs - - - When comparing performance with other filesystems, remember that - ext3/4 by default offers higher data integrity guarantees than most. So - when comparing with a metadata-only journalling filesystem, use `mount -o - data=writeback'. And you might as well use `mount -o nobh' too along - with it. Making the journal larger than the mke2fs default often helps - performance with metadata-intensive workloads. - -2. Features -=========== - -2.1 Currently available - -* ability to use filesystems > 16TB -* extent format reduces metadata overhead (RAM, IO for access, transactions) -* extent format more robust in face of on-disk corruption due to magics, -* internal redunancy in tree - -2.1 Previously available, soon to be enabled by default by "mkefs.ext4": - -* dir_index and resize inode will be on by default -* large inodes will be used by default for fast EAs, nsec timestamps, etc - -2.2 Candidate features for future inclusion - -There are several under discussion, whether they all make it in is -partly a function of how much time everyone has to work on them: - -* improved file allocation (multi-block alloc, delayed alloc; basically done) -* fix 32000 subdirectory limit (patch exists, needs some e2fsck work) -* nsec timestamps for mtime, atime, ctime, create time (patch exists, - needs some e2fsck work) -* inode version field on disk (NFSv4, Lustre; prototype exists) -* reduced mke2fs/e2fsck time via uninitialized groups (prototype exists) -* journal checksumming for robustness, performance (prototype exists) -* persistent file preallocation (e.g for streaming media, databases) - -Features like metadata checksumming have been discussed and planned for -a bit but no patches exist yet so I'm not sure they're in the near-term -roadmap. - -The big performance win will come with mballoc and delalloc. CFS has -been using mballoc for a few years already with Lustre, and IBM + Bull -did a lot of benchmarking on it. The reason it isn't in the first set of -patches is partly a manageability issue, and partly because it doesn't -directly affect the on-disk format (outside of much better allocation) -so it isn't critical to get into the first round of changes. I believe -Alex is working on a new set of patches right now. - -3. Options -========== - -When mounting an ext4 filesystem, the following option are accepted: -(*) == default - -extents ext4 will use extents to address file data. The - file system will no longer be mountable by ext3. - -journal=update Update the ext4 file system's journal to the current - format. - -journal=inum When a journal already exists, this option is ignored. - Otherwise, it specifies the number of the inode which - will represent the ext4 file system's journal file. - -journal_dev=devnum When the external journal device's major/minor numbers - have changed, this option allows the user to specify - the new journal location. The journal device is - identified through its new major/minor numbers encoded - in devnum. - -noload Don't load the journal on mounting. - -data=journal All data are committed into the journal prior to being - written into the main file system. - -data=ordered (*) All data are forced directly out to the main file - system prior to its metadata being committed to the - journal. - -data=writeback Data ordering is not preserved, data may be written - into the main file system after its metadata has been - committed to the journal. - -commit=nrsec (*) Ext4 can be told to sync all its data and metadata - every 'nrsec' seconds. The default value is 5 seconds. - This means that if you lose your power, you will lose - as much as the latest 5 seconds of work (your - filesystem will not be damaged though, thanks to the - journaling). This default value (or any low value) - will hurt performance, but it's good for data-safety. - Setting it to 0 will have the same effect as leaving - it at the default (5 seconds). - Setting it to very large values will improve - performance. - -barrier=1 This enables/disables barriers. barrier=0 disables - it, barrier=1 enables it. - -orlov (*) This enables the new Orlov block allocator. It is - enabled by default. - -oldalloc This disables the Orlov block allocator and enables - the old block allocator. Orlov should have better - performance - we'd like to get some feedback if it's - the contrary for you. - -user_xattr Enables Extended User Attributes. Additionally, you - need to have extended attribute support enabled in the - kernel configuration (CONFIG_EXT4_FS_XATTR). See the - attr(5) manual page and http://acl.bestbits.at/ to - learn more about extended attributes. - -nouser_xattr Disables Extended User Attributes. - -acl Enables POSIX Access Control Lists support. - Additionally, you need to have ACL support enabled in - the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL). - See the acl(5) manual page and http://acl.bestbits.at/ - for more information. - -noacl This option disables POSIX Access Control List - support. - -reservation - -noreservation - -bsddf (*) Make 'df' act like BSD. -minixdf Make 'df' act like Minix. - -check=none Don't do extra checking of bitmaps on mount. -nocheck - -debug Extra debugging information is sent to syslog. - -errors=remount-ro(*) Remount the filesystem read-only on an error. -errors=continue Keep going on a filesystem error. -errors=panic Panic and halt the machine if an error occurs. - -grpid Give objects the same group ID as their creator. -bsdgroups - -nogrpid (*) New objects have the group ID of their creator. -sysvgroups - -resgid=n The group ID which may use the reserved blocks. - -resuid=n The user ID which may use the reserved blocks. - -sb=n Use alternate superblock at this location. - -quota -noquota -grpquota -usrquota - -bh (*) ext4 associates buffer heads to data pages to -nobh (a) cache disk block mapping information - (b) link pages into transaction to provide - ordering guarantees. - "bh" option forces use of buffer heads. - "nobh" option tries to avoid associating buffer - heads (supported only for "writeback" mode). - - -Data Mode ---------- -There are 3 different data modes: - -* writeback mode -In data=writeback mode, ext4 does not journal data at all. This mode provides -a similar level of journaling as that of XFS, JFS, and ReiserFS in its default -mode - metadata journaling. A crash+recovery can cause incorrect data to -appear in files which were written shortly before the crash. This mode will -typically provide the best ext4 performance. - -* ordered mode -In data=ordered mode, ext4 only officially journals metadata, but it logically -groups metadata and data blocks into a single unit called a transaction. When -it's time to write the new metadata out to disk, the associated data blocks -are written first. In general, this mode performs slightly slower than -writeback but significantly faster than journal mode. - -* journal mode -data=journal mode provides full data and metadata journaling. All new data is -written to the journal first, and then to its final location. -In the event of a crash, the journal can be replayed, bringing both data and -metadata into a consistent state. This mode is the slowest except when data -needs to be read from and written to disk at the same time where it -outperforms all others modes. - -References -========== - -kernel source: - - -programs: http://e2fsprogs.sourceforge.net/ - http://ext2resize.sourceforge.net - -useful links: http://fedoraproject.org/wiki/ext3-devel - http://www.bullopensource.org/ext4/ diff --git a/trunk/Documentation/ibm-acpi.txt b/trunk/Documentation/ibm-acpi.txt index e50595bfd8ea..71aa40345272 100644 --- a/trunk/Documentation/ibm-acpi.txt +++ b/trunk/Documentation/ibm-acpi.txt @@ -30,10 +30,9 @@ detailed description): - ACPI sounds - temperature sensors - Experimental: embedded controller register dump - - LCD brightness control - - Volume control + - Experimental: LCD brightness control + - Experimental: volume control - Experimental: fan speed, fan enable/disable - - Experimental: WAN enable and disable A compatibility table by model and feature is maintained on the web site, http://ibm-acpi.sf.net/. I appreciate any success or failure @@ -53,7 +52,40 @@ Installation If you are compiling this driver as included in the Linux kernel sources, simply enable the CONFIG_ACPI_IBM option (Power Management / -ACPI / IBM ThinkPad Laptop Extras). +ACPI / IBM ThinkPad Laptop Extras). The rest of this section describes +how to install this driver when downloaded from the web site. + +First, you need to get a kernel with ACPI support up and running. +Please refer to http://acpi.sourceforge.net/ for help with this +step. How successful you will be depends a lot on you ThinkPad model, +the kernel you are using and any additional patches applied. The +kernel provided with your distribution may not be good enough. I +needed to compile a 2.6.7 kernel with the 20040715 ACPI patch to get +ACPI working reliably on my ThinkPad X40. Old ThinkPad models may not +be supported at all. + +Assuming you have the basic ACPI support working (e.g. you can see the +/proc/acpi directory), follow the following steps to install this +driver: + + - unpack the archive: + + tar xzvf ibm-acpi-x.y.tar.gz; cd ibm-acpi-x.y + + - compile the driver: + + make + + - install the module in your kernel modules directory: + + make install + + - load the module: + + modprobe ibm_acpi + +After loading the module, check the "dmesg" output for any error messages. + Features -------- @@ -491,8 +523,13 @@ registers contain the current battery capacity, etc. If you experiment with this, do send me your results (including some complete dumps with a description of the conditions when they were taken.) -LCD brightness control -- /proc/acpi/ibm/brightness ---------------------------------------------------- +EXPERIMENTAL: LCD brightness control -- /proc/acpi/ibm/brightness +----------------------------------------------------------------- + +This feature is marked EXPERIMENTAL because the implementation +directly accesses hardware registers and may not work as expected. USE +WITH CAUTION! To use this feature, you need to supply the +experimental=1 parameter when loading the module. This feature allows software control of the LCD brightness on ThinkPad models which don't have a hardware brightness slider. The available @@ -505,8 +542,13 @@ commands are: The number range is 0 to 7, although not all of them may be distinct. The current brightness level is shown in the file. -Volume control -- /proc/acpi/ibm/volume ---------------------------------------- +EXPERIMENTAL: Volume control -- /proc/acpi/ibm/volume +----------------------------------------------------- + +This feature is marked EXPERIMENTAL because the implementation +directly accesses hardware registers and may not work as expected. USE +WITH CAUTION! To use this feature, you need to supply the +experimental=1 parameter when loading the module. This feature allows volume control on ThinkPad models which don't have a hardware volume knob. The available commands are: @@ -569,23 +611,6 @@ with the following command: echo 'level ' > /proc/acpi/ibm/thermal -EXPERIMENTAL: WAN -- /proc/acpi/ibm/wan ---------------------------------------- - -This feature is marked EXPERIMENTAL because the implementation -directly accesses hardware registers and may not work as expected. USE -WITH CAUTION! To use this feature, you need to supply the -experimental=1 parameter when loading the module. - -This feature shows the presence and current state of a WAN (Sierra -Wireless EV-DO) device. If WAN is installed, the following commands can -be used: - - echo enable > /proc/acpi/ibm/wan - echo disable > /proc/acpi/ibm/wan - -It was tested on a Lenovo Thinkpad X60. It should probably work on other -Thinkpad models which come with this module installed. Multiple Commands, Module Parameters ------------------------------------ diff --git a/trunk/Documentation/lockdep-design.txt b/trunk/Documentation/lockdep-design.txt index 488773018152..dab123db5a4f 100644 --- a/trunk/Documentation/lockdep-design.txt +++ b/trunk/Documentation/lockdep-design.txt @@ -50,10 +50,10 @@ The bit position indicates hardirq, softirq, hardirq-read, softirq-read respectively, and the character displayed in each indicates: - '.' acquired while irqs disabled + '.' acquired while irqs enabled '+' acquired in irq context - '-' acquired with irqs enabled - '?' read acquired in irq context with irqs enabled. + '-' acquired in process context with irqs disabled + '?' read-acquired both with irqs enabled and in irq context Unused mutexes cannot be part of the cause of an error. diff --git a/trunk/Documentation/sysctl/kernel.txt b/trunk/Documentation/sysctl/kernel.txt index 0bc7f1e3c9e6..89bf8c20a586 100644 --- a/trunk/Documentation/sysctl/kernel.txt +++ b/trunk/Documentation/sysctl/kernel.txt @@ -86,7 +86,7 @@ valid for 30 seconds. core_pattern: core_pattern is used to specify a core dumpfile pattern name. -. max length 128 characters; default value is "core" +. max length 64 characters; default value is "core" . core_pattern is used as a pattern template for the output filename; certain string patterns (beginning with '%') are substituted with their actual values. @@ -105,9 +105,6 @@ core_pattern is used to specify a core dumpfile pattern name. %h hostname %e executable filename % both are dropped -. If the first character of the pattern is a '|', the kernel will treat - the rest of the pattern as a command to run. The core dump will be - written to the standard input of that program instead of to a file. ============================================================== diff --git a/trunk/Documentation/video4linux/CARDLIST.cx88 b/trunk/Documentation/video4linux/CARDLIST.cx88 index 8755b3e7b09e..126e59d935cd 100644 --- a/trunk/Documentation/video4linux/CARDLIST.cx88 +++ b/trunk/Documentation/video4linux/CARDLIST.cx88 @@ -51,7 +51,7 @@ 50 -> NPG Tech Real TV FM Top 10 [14f1:0842] 51 -> WinFast DTV2000 H [107d:665e] 52 -> Geniatech DVB-S [14f1:0084] - 53 -> Hauppauge WinTV-HVR3000 TriMode Analog/DVB-S/DVB-T [0070:1404,0070:1400,0070:1401,0070:1402] + 53 -> Hauppauge WinTV-HVR3000 TriMode Analog/DVB-S/DVB-T [0070:1404] 54 -> Norwood Micro TV Tuner 55 -> Shenzhen Tungsten Ages Tech TE-DTV-250 / Swann OEM [c180:c980] 56 -> Hauppauge WinTV-HVR1300 DVB-T/Hybrid MPEG Encoder [0070:9600,0070:9601,0070:9602] diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index b444b0b5c772..931e6e40c08b 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -720,7 +720,7 @@ P: Dave Jones M: davej@codemonkey.org.uk L: cpufreq@lists.linux.org.uk W: http://www.codemonkey.org.uk/projects/cpufreq/ -T: git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git +T: git kernel.org/pub/scm/linux/kernel/davej/cpufreq.git S: Maintained CPUID/MSR DRIVER @@ -905,8 +905,7 @@ P: David Teigland M: teigland@redhat.com L: cluster-devel@redhat.com W: http://sources.redhat.com/cluster/ -T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs2-2.6-fixes.git -T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw.git +T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs-2.6.git S: Supported DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER @@ -1189,8 +1188,7 @@ P: Steven Whitehouse M: swhiteho@redhat.com L: cluster-devel@redhat.com W: http://sources.redhat.com/cluster/ -T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs2-2.6-fixes.git -T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw.git +T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs-2.6.git S: Supported GIGASET ISDN DRIVERS @@ -1998,13 +1996,6 @@ M: rubini@ipvvis.unipv.it L: linux-kernel@vger.kernel.org S: Maintained -MSI LAPTOP SUPPORT -P: Lennart Poettering -M: mzxreary@0pointer.de -L: https://tango.0pointer.de/mailman/listinfo/s270-linux -W: http://0pointer.de/lennart/tchibo.html -S: Maintained - MTRR AND SIMILAR SUPPORT [i386] P: Richard Gooch M: rgooch@atnf.csiro.au diff --git a/trunk/Makefile b/trunk/Makefile index 703d40a65e7e..274b780029b1 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 19 -EXTRAVERSION =-rc2 +EXTRAVERSION =-rc1 NAME=Avast! A bilge rat! # *DOCUMENTATION* @@ -741,9 +741,6 @@ endif # ifdef CONFIG_KALLSYMS # vmlinux image - including updated kernel symbols vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -ifdef CONFIG_HEADERS_CHECK - $(Q)$(MAKE) -f $(srctree)/Makefile headers_check -endif $(call if_changed_rule,vmlinux__) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ $(Q)rm -f .old_version @@ -935,7 +932,7 @@ headers_install_all: include/linux/version.h scripts_basic FORCE PHONY += headers_install headers_install: include/linux/version.h scripts_basic FORCE - @if [ ! -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ + @if [ ! -r include/asm-$(ARCH)/Kbuild ]; then \ echo '*** Error: Headers not exportable for this architecture ($(ARCH))'; \ exit 1 ; fi $(Q)$(MAKE) $(build)=scripts scripts/unifdef diff --git a/trunk/arch/alpha/kernel/alpha_ksyms.c b/trunk/arch/alpha/kernel/alpha_ksyms.c index e9762a33b043..8b02420f732e 100644 --- a/trunk/arch/alpha/kernel/alpha_ksyms.c +++ b/trunk/arch/alpha/kernel/alpha_ksyms.c @@ -6,13 +6,40 @@ */ #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include +#include #include +#include #include +#include #include +#include #include +#include +#include +#include +#include +#include -#include +#include + +extern struct hwrpb_struct *hwrpb; +extern spinlock_t rtc_lock; /* these are C runtime functions with special calling conventions: */ extern void __divl (void); @@ -25,9 +52,14 @@ extern void __divqu (void); extern void __remqu (void); EXPORT_SYMBOL(alpha_mv); +EXPORT_SYMBOL(screen_info); +EXPORT_SYMBOL(perf_irq); EXPORT_SYMBOL(callback_getenv); EXPORT_SYMBOL(callback_setenv); EXPORT_SYMBOL(callback_save_env); +#ifdef CONFIG_ALPHA_GENERIC +EXPORT_SYMBOL(alpha_using_srm); +#endif /* CONFIG_ALPHA_GENERIC */ /* platform dependent support */ EXPORT_SYMBOL(strcat); @@ -45,14 +77,47 @@ EXPORT_SYMBOL(__constant_c_memset); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(__direct_map_base); +EXPORT_SYMBOL(__direct_map_size); + +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); +EXPORT_SYMBOL(pci_map_single); +EXPORT_SYMBOL(pci_map_page); +EXPORT_SYMBOL(pci_unmap_single); +EXPORT_SYMBOL(pci_unmap_page); +EXPORT_SYMBOL(pci_map_sg); +EXPORT_SYMBOL(pci_unmap_sg); +EXPORT_SYMBOL(pci_dma_supported); +EXPORT_SYMBOL(pci_dac_dma_supported); +EXPORT_SYMBOL(pci_dac_page_to_dma); +EXPORT_SYMBOL(pci_dac_dma_to_page); +EXPORT_SYMBOL(pci_dac_dma_to_offset); +EXPORT_SYMBOL(alpha_gendev_to_pci); +#endif +EXPORT_SYMBOL(dma_set_mask); + +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(dump_elf_thread); +EXPORT_SYMBOL(dump_elf_task); +EXPORT_SYMBOL(dump_elf_task_fp); +EXPORT_SYMBOL(hwrpb); +EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(alpha_read_fp_reg); EXPORT_SYMBOL(alpha_read_fp_reg_s); EXPORT_SYMBOL(alpha_write_fp_reg); EXPORT_SYMBOL(alpha_write_fp_reg_s); -/* entry.S */ +/* In-kernel system calls. */ EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(sys_dup); +EXPORT_SYMBOL(sys_exit); +EXPORT_SYMBOL(sys_write); +EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(kernel_execve); +EXPORT_SYMBOL(sys_setsid); +EXPORT_SYMBOL(sys_wait4); /* Networking helper routines. */ EXPORT_SYMBOL(csum_tcpudp_magic); @@ -69,6 +134,10 @@ EXPORT_SYMBOL(alpha_fp_emul_imprecise); EXPORT_SYMBOL(alpha_fp_emul); #endif +#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK +EXPORT_SYMBOL(__min_ipl); +#endif + /* * The following are specially called from the uaccess assembly stubs. */ @@ -91,9 +160,26 @@ EXPORT_SYMBOL(up); */ #ifdef CONFIG_SMP +EXPORT_SYMBOL(flush_tlb_mm); +EXPORT_SYMBOL(flush_tlb_range); +EXPORT_SYMBOL(flush_tlb_page); +EXPORT_SYMBOL(smp_imb); +EXPORT_SYMBOL(cpu_data); +EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(smp_call_function); +EXPORT_SYMBOL(smp_call_function_on_cpu); EXPORT_SYMBOL(_atomic_dec_and_lock); #endif /* CONFIG_SMP */ +/* + * NUMA specific symbols + */ +#ifdef CONFIG_DISCONTIGMEM +EXPORT_SYMBOL(node_data); +#endif /* CONFIG_DISCONTIGMEM */ + +EXPORT_SYMBOL(rtc_lock); + /* * The following are special because they're not called * explicitly (the C compiler or assembler generates them in @@ -114,3 +200,8 @@ EXPORT_SYMBOL(__remqu); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memchr); + +#ifdef CONFIG_ALPHA_IRONGATE +EXPORT_SYMBOL(irongate_ioremap); +EXPORT_SYMBOL(irongate_iounmap); +#endif diff --git a/trunk/arch/alpha/kernel/core_irongate.c b/trunk/arch/alpha/kernel/core_irongate.c index e4a0bcf1d28b..138d497d1cca 100644 --- a/trunk/arch/alpha/kernel/core_irongate.c +++ b/trunk/arch/alpha/kernel/core_irongate.c @@ -404,7 +404,6 @@ irongate_ioremap(unsigned long addr, unsigned long size) #endif return (void __iomem *)vaddr; } -EXPORT_SYMBOL(irongate_ioremap); void irongate_iounmap(volatile void __iomem *xaddr) @@ -415,4 +414,3 @@ irongate_iounmap(volatile void __iomem *xaddr) if (addr) return vfree((void *)(PAGE_MASK & addr)); } -EXPORT_SYMBOL(irongate_iounmap); diff --git a/trunk/arch/alpha/kernel/irq_alpha.c b/trunk/arch/alpha/kernel/irq_alpha.c index e16aeb6e79ef..6dd126b8be85 100644 --- a/trunk/arch/alpha/kernel/irq_alpha.c +++ b/trunk/arch/alpha/kernel/irq_alpha.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -17,7 +16,6 @@ /* Hack minimum IPL during interrupt processing for broken hardware. */ #ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK int __min_ipl; -EXPORT_SYMBOL(__min_ipl); #endif /* @@ -32,7 +30,6 @@ dummy_perf(unsigned long vector, struct pt_regs *regs) } void (*perf_irq)(unsigned long, struct pt_regs *) = dummy_perf; -EXPORT_SYMBOL(perf_irq); /* * The main interrupt entry point. diff --git a/trunk/arch/alpha/kernel/pci-noop.c b/trunk/arch/alpha/kernel/pci-noop.c index 174b729c504b..fff5cf93e816 100644 --- a/trunk/arch/alpha/kernel/pci-noop.c +++ b/trunk/arch/alpha/kernel/pci-noop.c @@ -201,7 +201,6 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } -EXPORT_SYMBOL(dma_set_mask); void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { diff --git a/trunk/arch/alpha/kernel/pci_iommu.c b/trunk/arch/alpha/kernel/pci_iommu.c index 6e7d1fe6e935..c468e312e5f8 100644 --- a/trunk/arch/alpha/kernel/pci_iommu.c +++ b/trunk/arch/alpha/kernel/pci_iommu.c @@ -300,7 +300,6 @@ pci_map_single(struct pci_dev *pdev, void *cpu_addr, size_t size, int dir) dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; return pci_map_single_1(pdev, cpu_addr, size, dac_allowed); } -EXPORT_SYMBOL(pci_map_single); dma_addr_t pci_map_page(struct pci_dev *pdev, struct page *page, unsigned long offset, @@ -315,7 +314,6 @@ pci_map_page(struct pci_dev *pdev, struct page *page, unsigned long offset, return pci_map_single_1(pdev, (char *)page_address(page) + offset, size, dac_allowed); } -EXPORT_SYMBOL(pci_map_page); /* Unmap a single streaming mode DMA translation. The DMA_ADDR and SIZE must match what was provided for in a previous pci_map_single @@ -381,7 +379,6 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size, DBGA2("pci_unmap_single: sg [%lx,%lx] np %ld from %p\n", dma_addr, size, npages, __builtin_return_address(0)); } -EXPORT_SYMBOL(pci_unmap_single); void pci_unmap_page(struct pci_dev *pdev, dma_addr_t dma_addr, @@ -389,7 +386,6 @@ pci_unmap_page(struct pci_dev *pdev, dma_addr_t dma_addr, { pci_unmap_single(pdev, dma_addr, size, direction); } -EXPORT_SYMBOL(pci_unmap_page); /* Allocate and map kernel buffer using consistent mode DMA for PCI device. Returns non-NULL cpu-view pointer to the buffer if @@ -431,7 +427,6 @@ pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) return cpu_addr; } -EXPORT_SYMBOL(pci_alloc_consistent); /* Free and unmap a consistent DMA buffer. CPU_ADDR and DMA_ADDR must be values that were returned from pci_alloc_consistent. SIZE must @@ -449,7 +444,7 @@ pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr, DBGA2("pci_free_consistent: [%x,%lx] from %p\n", dma_addr, size, __builtin_return_address(0)); } -EXPORT_SYMBOL(pci_free_consistent); + /* Classify the elements of the scatterlist. Write dma_address of each element with: @@ -677,7 +672,6 @@ pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, pci_unmap_sg(pdev, start, out - start, direction); return 0; } -EXPORT_SYMBOL(pci_map_sg); /* Unmap a set of streaming mode DMA translations. Again, cpu read rules concerning calls here are the same as for pci_unmap_single() @@ -758,7 +752,6 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, DBGA("pci_unmap_sg: %ld entries\n", nents - (end - sg)); } -EXPORT_SYMBOL(pci_unmap_sg); /* Return whether the given PCI device DMA address mask can be @@ -793,7 +786,6 @@ pci_dma_supported(struct pci_dev *pdev, u64 mask) return 0; } -EXPORT_SYMBOL(pci_dma_supported); /* @@ -916,7 +908,6 @@ pci_dac_dma_supported(struct pci_dev *dev, u64 mask) return ok; } -EXPORT_SYMBOL(pci_dac_dma_supported); dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, @@ -926,7 +917,6 @@ pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, + __pa(page_address(page)) + (dma64_addr_t) offset); } -EXPORT_SYMBOL(pci_dac_page_to_dma); struct page * pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) @@ -934,14 +924,13 @@ pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) unsigned long paddr = (dma_addr & PAGE_MASK) - alpha_mv.pci_dac_offset; return virt_to_page(__va(paddr)); } -EXPORT_SYMBOL(pci_dac_dma_to_page); unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) { return (dma_addr & ~PAGE_MASK); } -EXPORT_SYMBOL(pci_dac_dma_to_offset); + /* Helper for generic DMA-mapping functions. */ @@ -968,7 +957,6 @@ alpha_gendev_to_pci(struct device *dev) /* This assumes ISA bus master with dma_mask 0xffffff. */ return NULL; } -EXPORT_SYMBOL(alpha_gendev_to_pci); int dma_set_mask(struct device *dev, u64 mask) @@ -981,4 +969,3 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } -EXPORT_SYMBOL(dma_set_mask); diff --git a/trunk/arch/alpha/kernel/process.c b/trunk/arch/alpha/kernel/process.c index 3370e6faeae0..b3a8a2980365 100644 --- a/trunk/arch/alpha/kernel/process.c +++ b/trunk/arch/alpha/kernel/process.c @@ -205,7 +205,6 @@ start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) regs->ps = 8; wrusp(sp); } -EXPORT_SYMBOL(start_thread); /* * Free current thread data structures etc.. @@ -377,7 +376,6 @@ dump_thread(struct pt_regs * pt, struct user * dump) dump->regs[EF_A2] = pt->r18; memcpy((char *)dump->regs + EF_SIZE, sw->fp, 32 * 8); } -EXPORT_SYMBOL(dump_thread); /* * Fill in the user structure for a ELF core dump. @@ -426,7 +424,6 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti) useful value of the thread's UNIQUE field. */ dest[32] = ti->pcb.unique; } -EXPORT_SYMBOL(dump_elf_thread); int dump_elf_task(elf_greg_t *dest, struct task_struct *task) @@ -434,7 +431,6 @@ dump_elf_task(elf_greg_t *dest, struct task_struct *task) dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task)); return 1; } -EXPORT_SYMBOL(dump_elf_task); int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task) @@ -443,7 +439,6 @@ dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task) memcpy(dest, sw->fp, 32 * 8); return 1; } -EXPORT_SYMBOL(dump_elf_task_fp); /* * sys_execve() executes a new program. diff --git a/trunk/arch/alpha/kernel/setup.c b/trunk/arch/alpha/kernel/setup.c index 1aea7c7c683c..a94e6d93e2ee 100644 --- a/trunk/arch/alpha/kernel/setup.c +++ b/trunk/arch/alpha/kernel/setup.c @@ -66,7 +66,6 @@ static struct notifier_block alpha_panic_block = { struct hwrpb_struct *hwrpb; -EXPORT_SYMBOL(hwrpb); unsigned long srm_hae; int alpha_l1i_cacheshape; @@ -112,7 +111,6 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE; #ifdef CONFIG_ALPHA_GENERIC struct alpha_machine_vector alpha_mv; int alpha_using_srm; -EXPORT_SYMBOL(alpha_using_srm); #endif static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long, @@ -139,8 +137,6 @@ struct screen_info screen_info = { .orig_video_points = 16 }; -EXPORT_SYMBOL(screen_info); - /* * The direct map I/O window, if any. This should be the same * for all busses, since it's used by virt_to_bus. @@ -148,8 +144,6 @@ EXPORT_SYMBOL(screen_info); unsigned long __direct_map_base; unsigned long __direct_map_size; -EXPORT_SYMBOL(__direct_map_base); -EXPORT_SYMBOL(__direct_map_size); /* * Declare all of the machine vectors. diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c index d1ec4f51df1a..596780e2c7da 100644 --- a/trunk/arch/alpha/kernel/smp.c +++ b/trunk/arch/alpha/kernel/smp.c @@ -52,7 +52,6 @@ /* A collection of per-processor data. */ struct cpuinfo_alpha cpu_data[NR_CPUS]; -EXPORT_SYMBOL(cpu_data); /* A collection of single bit ipi messages. */ static struct { @@ -75,7 +74,6 @@ EXPORT_SYMBOL(cpu_online_map); int smp_num_probed; /* Internal processor count */ int smp_num_cpus = 1; /* Number that came online. */ -EXPORT_SYMBOL(smp_num_cpus); extern void calibrate_delay(void); @@ -792,7 +790,6 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry, return 0; } -EXPORT_SYMBOL(smp_call_function_on_cpu); int smp_call_function (void (*func) (void *info), void *info, int retry, int wait) @@ -800,7 +797,6 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait) return smp_call_function_on_cpu (func, info, retry, wait, cpu_online_map); } -EXPORT_SYMBOL(smp_call_function); static void ipi_imb(void *ignored) @@ -815,7 +811,6 @@ smp_imb(void) if (on_each_cpu(ipi_imb, NULL, 1, 1)) printk(KERN_CRIT "smp_imb: timed out\n"); } -EXPORT_SYMBOL(smp_imb); static void ipi_flush_tlb_all(void *ignored) @@ -871,7 +866,6 @@ flush_tlb_mm(struct mm_struct *mm) preempt_enable(); } -EXPORT_SYMBOL(flush_tlb_mm); struct flush_tlb_page_struct { struct vm_area_struct *vma; @@ -924,7 +918,6 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) preempt_enable(); } -EXPORT_SYMBOL(flush_tlb_page); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -932,7 +925,6 @@ flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long e /* On the Alpha we always flush the whole user tlb. */ flush_tlb_mm(vma->vm_mm); } -EXPORT_SYMBOL(flush_tlb_range); static void ipi_flush_icache_page(void *x) diff --git a/trunk/arch/alpha/kernel/time.c b/trunk/arch/alpha/kernel/time.c index d7053eb4ffcf..cf0666523989 100644 --- a/trunk/arch/alpha/kernel/time.c +++ b/trunk/arch/alpha/kernel/time.c @@ -57,7 +57,6 @@ static int set_rtc_mmss(unsigned long); DEFINE_SPINLOCK(rtc_lock); -EXPORT_SYMBOL(rtc_lock); #define TICK_SIZE (tick_nsec / 1000) diff --git a/trunk/arch/alpha/mm/numa.c b/trunk/arch/alpha/mm/numa.c index e3e3806a6f25..b826f58c6e72 100644 --- a/trunk/arch/alpha/mm/numa.c +++ b/trunk/arch/alpha/mm/numa.c @@ -13,14 +13,12 @@ #include #include #include -#include #include #include pg_data_t node_data[MAX_NUMNODES]; bootmem_data_t node_bdata[MAX_NUMNODES]; -EXPORT_SYMBOL(node_data); #undef DEBUG_DISCONTIG #ifdef DEBUG_DISCONTIG diff --git a/trunk/arch/arm/kernel/armksyms.c b/trunk/arch/arm/kernel/armksyms.c index 4779f474f911..da69e660574b 100644 --- a/trunk/arch/arm/kernel/armksyms.c +++ b/trunk/arch/arm/kernel/armksyms.c @@ -178,3 +178,9 @@ EXPORT_SYMBOL(_find_next_zero_bit_be); EXPORT_SYMBOL(_find_first_bit_be); EXPORT_SYMBOL(_find_next_bit_be); #endif + + /* syscalls */ +EXPORT_SYMBOL(sys_write); +EXPORT_SYMBOL(sys_lseek); +EXPORT_SYMBOL(sys_exit); +EXPORT_SYMBOL(sys_wait4); diff --git a/trunk/arch/arm/mach-versatile/core.c b/trunk/arch/arm/mach-versatile/core.c index 3b8576111c16..2aa150b57ba1 100644 --- a/trunk/arch/arm/mach-versatile/core.c +++ b/trunk/arch/arm/mach-versatile/core.c @@ -188,12 +188,12 @@ static struct map_desc versatile_io_desc[] __initdata = { .length = SZ_4K, .type = MT_DEVICE }, { - .virtual = (unsigned long)VERSATILE_PCI_VIRT_BASE, + .virtual = VERSATILE_PCI_VIRT_BASE, .pfn = __phys_to_pfn(VERSATILE_PCI_BASE), .length = VERSATILE_PCI_BASE_SIZE, .type = MT_DEVICE }, { - .virtual = (unsigned long)VERSATILE_PCI_CFG_VIRT_BASE, + .virtual = VERSATILE_PCI_CFG_VIRT_BASE, .pfn = __phys_to_pfn(VERSATILE_PCI_CFG_BASE), .length = VERSATILE_PCI_CFG_BASE_SIZE, .type = MT_DEVICE diff --git a/trunk/arch/arm/mach-versatile/pci.c b/trunk/arch/arm/mach-versatile/pci.c index 5cd0b5d9e7eb..13bbd08ff841 100644 --- a/trunk/arch/arm/mach-versatile/pci.c +++ b/trunk/arch/arm/mach-versatile/pci.c @@ -40,15 +40,14 @@ * Cfg 42000000 - 42FFFFFF PCI config * */ -#define __IO_ADDRESS(n) ((void __iomem *)(unsigned long)IO_ADDRESS(n)) -#define SYS_PCICTL __IO_ADDRESS(VERSATILE_SYS_PCICTL) -#define PCI_IMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) -#define PCI_IMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) -#define PCI_IMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) -#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) -#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) -#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) -#define PCI_SELFID __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) +#define SYS_PCICTL IO_ADDRESS(VERSATILE_SYS_PCICTL) +#define PCI_IMAP0 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) +#define PCI_IMAP1 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) +#define PCI_IMAP2 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) +#define PCI_SMAP0 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) +#define PCI_SMAP1 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) +#define PCI_SMAP2 IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SELFID IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) #define DEVICE_ID_OFFSET 0x00 #define CSR_OFFSET 0x04 @@ -77,7 +76,7 @@ static int __init versatile_pci_slot_ignore(char *str) __setup("pci_slot_ignore=", versatile_pci_slot_ignore); -static void __iomem *__pci_addr(struct pci_bus *bus, +static unsigned long __pci_addr(struct pci_bus *bus, unsigned int devfn, int offset) { unsigned int busnr = bus->number; @@ -92,14 +91,14 @@ static void __iomem *__pci_addr(struct pci_bus *bus, if (devfn > 255) BUG(); - return VERSATILE_PCI_CFG_VIRT_BASE + ((busnr << 16) | + return (VERSATILE_PCI_CFG_VIRT_BASE | (busnr << 16) | (PCI_SLOT(devfn) << 11) | (PCI_FUNC(devfn) << 8) | offset); } static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - void __iomem *addr = __pci_addr(bus, devfn, where & ~3); + unsigned long addr = __pci_addr(bus, devfn, where); u32 v; int slot = PCI_SLOT(devfn); @@ -122,12 +121,13 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh break; case 2: - v = __raw_readl(addr); - if (where & 2) v >>= 16; + v = __raw_readl(addr & ~3); + if (addr & 2) v >>= 16; v &= 0xffff; break; default: + addr &= ~3; v = __raw_readl(addr); break; } @@ -140,7 +140,7 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh static int versatile_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - void __iomem *addr = __pci_addr(bus, devfn, where); + unsigned long addr = __pci_addr(bus, devfn, where); int slot = PCI_SLOT(devfn); if (pci_slot_ignore & (1 << slot)) { @@ -279,7 +279,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) printk("PCI core found (slot %d)\n",myslot); __raw_writel(myslot, PCI_SELFID); - local_pci_cfg_base = VERSATILE_PCI_CFG_VIRT_BASE + (myslot << 11); + local_pci_cfg_base = (void *) VERSATILE_PCI_CFG_VIRT_BASE + (myslot << 11); val = __raw_readl(local_pci_cfg_base + CSR_OFFSET); val |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE; diff --git a/trunk/arch/arm/vfp/vfpmodule.c b/trunk/arch/arm/vfp/vfpmodule.c index a657a28f08db..dedbb449632e 100644 --- a/trunk/arch/arm/vfp/vfpmodule.c +++ b/trunk/arch/arm/vfp/vfpmodule.c @@ -90,7 +90,7 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) info.si_signo = SIGFPE; info.si_code = sicode; - info.si_addr = (void __user *)(instruction_pointer(regs) - 4); + info.si_addr = (void *)(instruction_pointer(regs) - 4); /* * This is the same as NWFPE, because it's not clear what diff --git a/trunk/arch/arm26/kernel/armksyms.c b/trunk/arch/arm26/kernel/armksyms.c index 93293d04b303..07907b6ecb63 100644 --- a/trunk/arch/arm26/kernel/armksyms.c +++ b/trunk/arch/arm26/kernel/armksyms.c @@ -202,6 +202,14 @@ EXPORT_SYMBOL(_find_next_zero_bit_le); EXPORT_SYMBOL(elf_platform); EXPORT_SYMBOL(elf_hwcap); + /* syscalls */ +EXPORT_SYMBOL(sys_write); +EXPORT_SYMBOL(sys_read); +EXPORT_SYMBOL(sys_lseek); +EXPORT_SYMBOL(sys_open); +EXPORT_SYMBOL(sys_exit); +EXPORT_SYMBOL(sys_wait4); + #ifdef CONFIG_PREEMPT EXPORT_SYMBOL(kernel_flag); #endif diff --git a/trunk/arch/avr32/kernel/time.c b/trunk/arch/avr32/kernel/time.c index 5a247ba71a72..3e56b9f4358a 100644 --- a/trunk/arch/avr32/kernel/time.c +++ b/trunk/arch/avr32/kernel/time.c @@ -124,15 +124,15 @@ unsigned long long sched_clock(void) * * In UP mode, it is invoked from the (global) timer_interrupt. */ -static void local_timer_interrupt(int irq, void *dev_id) +static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { if (current->pid) - profile_tick(CPU_PROFILING); - update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(regs)); } static irqreturn_t -timer_interrupt(int irq, void *dev_id) +timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { unsigned int count; @@ -157,7 +157,7 @@ timer_interrupt(int irq, void *dev_id) * * SMP is not supported yet. */ - local_timer_interrupt(irq, dev_id); + local_timer_interrupt(irq, dev_id, regs); return IRQ_HANDLED; } diff --git a/trunk/arch/avr32/mach-at32ap/extint.c b/trunk/arch/avr32/mach-at32ap/extint.c index 4dff1f988900..7da9c5f7a0eb 100644 --- a/trunk/arch/avr32/mach-at32ap/extint.c +++ b/trunk/arch/avr32/mach-at32ap/extint.c @@ -102,7 +102,8 @@ struct irq_chip eim_chip = { .set_type = eim_set_irq_type, }; -static void demux_eim_irq(unsigned int irq, struct irq_desc *desc) +static void demux_eim_irq(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) { struct at32_sm *sm = desc->handler_data; struct irq_desc *ext_desc; @@ -120,7 +121,7 @@ static void demux_eim_irq(unsigned int irq, struct irq_desc *desc) ext_irq = i + sm->eim_first_irq; ext_desc = irq_desc + ext_irq; - ext_desc->handle_irq(ext_irq, ext_desc); + ext_desc->handle_irq(ext_irq, ext_desc, regs); } spin_unlock(&sm->lock); diff --git a/trunk/arch/avr32/mach-at32ap/intc.c b/trunk/arch/avr32/mach-at32ap/intc.c index eb87a18ad7b2..74f8c9f2f03d 100644 --- a/trunk/arch/avr32/mach-at32ap/intc.c +++ b/trunk/arch/avr32/mach-at32ap/intc.c @@ -52,19 +52,16 @@ static struct intc intc0 = { asmlinkage void do_IRQ(int level, struct pt_regs *regs) { struct irq_desc *desc; - struct pt_regs *old_regs; unsigned int irq; unsigned long status_reg; local_irq_disable(); - old_regs = set_irq_regs(regs); - irq_enter(); irq = intc_readl(&intc0, INTCAUSE0 - 4 * level); desc = irq_desc + irq; - desc->handle_irq(irq, desc); + desc->handle_irq(irq, desc, regs); /* * Clear all interrupt level masks so that we may handle @@ -78,8 +75,6 @@ asmlinkage void do_IRQ(int level, struct pt_regs *regs) sysreg_write(SR, status_reg); irq_exit(); - - set_irq_regs(old_regs); } void __init init_IRQ(void) diff --git a/trunk/arch/i386/Kconfig.cpu b/trunk/arch/i386/Kconfig.cpu index fc4f2abccf06..21c9a4e71104 100644 --- a/trunk/arch/i386/Kconfig.cpu +++ b/trunk/arch/i386/Kconfig.cpu @@ -7,7 +7,6 @@ choice config M386 bool "386" - depends on !UML ---help--- This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel that can run on @@ -302,7 +301,7 @@ config X86_USE_PPRO_CHECKSUM config X86_USE_3DNOW bool - depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML + depends on MCYRIXIII || MK7 || MGEODE_LX default y config X86_OOSTORE diff --git a/trunk/arch/i386/kernel/acpi/boot.c b/trunk/arch/i386/kernel/acpi/boot.c index ab974ff97073..92f79cdd9a48 100644 --- a/trunk/arch/i386/kernel/acpi/boot.c +++ b/trunk/arch/i386/kernel/acpi/boot.c @@ -332,7 +332,7 @@ acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) /* * Parse Interrupt Source Override for the ACPI SCI */ -static void acpi_sci_ioapic_setup(u32 bus_irq, u32 gsi, u16 polarity, u16 trigger) +static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) { if (trigger == 0) /* compatible SCI trigger is level */ trigger = 3; @@ -352,13 +352,13 @@ static void acpi_sci_ioapic_setup(u32 bus_irq, u32 gsi, u16 polarity, u16 trigge * If GSI is < 16, this will update its flags, * else it will create a new mp_irqs[] entry. */ - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + mp_override_legacy_irq(gsi, polarity, trigger, gsi); /* * stash over-ride to indicate we've been here * and for later update of acpi_fadt */ - acpi_sci_override_gsi = bus_irq; + acpi_sci_override_gsi = gsi; return; } @@ -376,7 +376,7 @@ acpi_parse_int_src_ovr(acpi_table_entry_header * header, acpi_table_print_madt_entry(header); if (intsrc->bus_irq == acpi_fadt.sci_int) { - acpi_sci_ioapic_setup(intsrc->bus_irq, intsrc->global_irq, + acpi_sci_ioapic_setup(intsrc->global_irq, intsrc->flags.polarity, intsrc->flags.trigger); return 0; @@ -879,7 +879,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) * pretend we got one so we can set the SCI flags. */ if (!acpi_sci_override_gsi) - acpi_sci_ioapic_setup(acpi_fadt.sci_int, acpi_fadt.sci_int, 0, 0); + acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); /* Fill in identity legacy mapings where no override */ mp_config_acpi_legacy_irqs(); diff --git a/trunk/arch/i386/kernel/acpi/cstate.c b/trunk/arch/i386/kernel/acpi/cstate.c index 20563e52c622..25db49ef1770 100644 --- a/trunk/arch/i386/kernel/acpi/cstate.c +++ b/trunk/arch/i386/kernel/acpi/cstate.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -42,124 +41,5 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, flags->bm_check = 1; } } -EXPORT_SYMBOL(acpi_processor_power_init_bm_check); - -/* The code below handles cstate entry with monitor-mwait pair on Intel*/ - -struct cstate_entry_s { - struct { - unsigned int eax; - unsigned int ecx; - } states[ACPI_PROCESSOR_MAX_POWER]; -}; -static struct cstate_entry_s *cpu_cstate_entry; /* per CPU ptr */ - -static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; - -#define MWAIT_SUBSTATE_MASK (0xf) -#define MWAIT_SUBSTATE_SIZE (4) - -#define CPUID_MWAIT_LEAF (5) -#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) -#define CPUID5_ECX_INTERRUPT_BREAK (0x2) - -#define MWAIT_ECX_INTERRUPT_BREAK (0x1) - -#define NATIVE_CSTATE_BEYOND_HALT (2) - -int acpi_processor_ffh_cstate_probe(unsigned int cpu, - struct acpi_processor_cx *cx, struct acpi_power_register *reg) -{ - struct cstate_entry_s *percpu_entry; - struct cpuinfo_x86 *c = cpu_data + cpu; - - cpumask_t saved_mask; - int retval; - unsigned int eax, ebx, ecx, edx; - unsigned int edx_part; - unsigned int cstate_type; /* C-state type and not ACPI C-state type */ - unsigned int num_cstate_subtype; - - if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF ) - return -1; - - if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) - return -1; - - percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); - percpu_entry->states[cx->index].eax = 0; - percpu_entry->states[cx->index].ecx = 0; - - /* Make sure we are running on right CPU */ - saved_mask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (retval) - return -1; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); - - /* Check whether this particular cx_type (in CST) is supported or not */ - cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; - edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); - num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; - - retval = 0; - if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { - retval = -1; - goto out; - } - /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) { - retval = -1; - goto out; - } - percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; - - /* Use the hint in CST */ - percpu_entry->states[cx->index].eax = cx->address; - - if (!mwait_supported[cstate_type]) { - mwait_supported[cstate_type] = 1; - printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " - "state\n", cx->type); - } - -out: - set_cpus_allowed(current, saved_mask); - return retval; -} -EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); - -void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) -{ - unsigned int cpu = smp_processor_id(); - struct cstate_entry_s *percpu_entry; - - percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); - mwait_idle_with_hints(percpu_entry->states[cx->index].eax, - percpu_entry->states[cx->index].ecx); -} -EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter); - -static int __init ffh_cstate_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_INTEL) - return -1; - - cpu_cstate_entry = alloc_percpu(struct cstate_entry_s); - return 0; -} - -static void __exit ffh_cstate_exit(void) -{ - if (cpu_cstate_entry) { - free_percpu(cpu_cstate_entry); - cpu_cstate_entry = NULL; - } -} - -arch_initcall(ffh_cstate_init); -__exitcall(ffh_cstate_exit); +EXPORT_SYMBOL(acpi_processor_power_init_bm_check); diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/Kconfig b/trunk/arch/i386/kernel/cpu/cpufreq/Kconfig index 5299c5bf4454..ccc1edff5c97 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/trunk/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -17,7 +17,6 @@ config X86_ACPI_CPUFREQ help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. - This driver also supports Intel Enhanced Speedstep. For details, take a look at . @@ -122,14 +121,11 @@ config X86_SPEEDSTEP_CENTRINO If in doubt, say N. config X86_SPEEDSTEP_CENTRINO_ACPI - bool "Use ACPI tables to decode valid frequency/voltage (deprecated)" + bool "Use ACPI tables to decode valid frequency/voltage pairs" depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m) default y help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. Use primarily the information provided in the BIOS ACPI tables to determine valid CPU frequency and voltage pairings. It is required for the driver to work on non-Banias CPUs. diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/Makefile b/trunk/arch/i386/kernel/cpu/cpufreq/Makefile index 8de3abe322a9..2e894f1c8910 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/trunk/arch/i386/kernel/cpu/cpufreq/Makefile @@ -7,9 +7,9 @@ obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o obj-$(CONFIG_X86_LONGRUN) += longrun.o obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o +obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o -obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 18f4715c655d..57c880bf0bd6 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -1,10 +1,9 @@ /* - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) + * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.3 $) * * Copyright (C) 2001, 2002 Andy Grover * Copyright (C) 2001, 2002 Paul Diefenbaugh * Copyright (C) 2002 - 2004 Dominik Brodowski - * Copyright (C) 2006 Denis Sadykov * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * @@ -28,387 +27,202 @@ #include #include #include -#include -#include #include +#include +#include #include +#include /* current */ #include - -#include -#include - #include -#include -#include -#include #include #include +#include +#include + #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); MODULE_LICENSE("GPL"); -enum { - UNDEFINED_CAPABLE = 0, - SYSTEM_INTEL_MSR_CAPABLE, - SYSTEM_IO_CAPABLE, -}; - -#define INTEL_MSR_RANGE (0xffff) -#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) -struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int max_freq; - unsigned int resume; - unsigned int cpu_feature; +struct cpufreq_acpi_io { + struct acpi_processor_performance *acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int resume; }; -static struct acpi_cpufreq_data *drv_data[NR_CPUS]; -static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; +static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; +static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; -static int check_est_cpu(unsigned int cpuid) -{ - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; - - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return 0; - - return 1; -} - -static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) -{ - struct acpi_processor_performance *perf; - int i; - - perf = data->acpi_data; - - for (i=0; istate_count; i++) { - if (value == perf->states[i].status) - return data->freq_table[i].frequency; - } - return 0; -} - -static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) +static int +acpi_processor_write_port( + u16 port, + u8 bit_width, + u32 value) { - int i; - struct acpi_processor_performance *perf; - - msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; - - for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == perf->states[data->freq_table[i].index].status) - return data->freq_table[i].frequency; - } - return data->freq_table[0].frequency; -} - -static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) -{ - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - return extract_msr(val, data); - case SYSTEM_IO_CAPABLE: - return extract_io(val, data); - default: - return 0; - } -} - -static void wrport(u16 port, u8 bit_width, u32 value) -{ - if (bit_width <= 8) + if (bit_width <= 8) { outb(value, port); - else if (bit_width <= 16) + } else if (bit_width <= 16) { outw(value, port); - else if (bit_width <= 32) + } else if (bit_width <= 32) { outl(value, port); + } else { + return -ENODEV; + } + return 0; } -static void rdport(u16 port, u8 bit_width, u32 * ret) +static int +acpi_processor_read_port( + u16 port, + u8 bit_width, + u32 *ret) { *ret = 0; - if (bit_width <= 8) + if (bit_width <= 8) { *ret = inb(port); - else if (bit_width <= 16) + } else if (bit_width <= 16) { *ret = inw(port); - else if (bit_width <= 32) + } else if (bit_width <= 32) { *ret = inl(port); -} - -struct msr_addr { - u32 reg; -}; - -struct io_addr { - u16 port; - u8 bit_width; -}; - -typedef union { - struct msr_addr msr; - struct io_addr io; -} drv_addr_union; - -struct drv_cmd { - unsigned int type; - cpumask_t mask; - drv_addr_union addr; - u32 val; -}; - -static void do_drv_read(struct drv_cmd *cmd) -{ - u32 h; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - rdport(cmd->addr.io.port, cmd->addr.io.bit_width, &cmd->val); - break; - default: - break; - } -} - -static void do_drv_write(struct drv_cmd *cmd) -{ - u32 h = 0; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - wrmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - wrport(cmd->addr.io.port, cmd->addr.io.bit_width, cmd->val); - break; - default: - break; - } -} - -static void drv_read(struct drv_cmd *cmd) -{ - cpumask_t saved_mask = current->cpus_allowed; - cmd->val = 0; - - set_cpus_allowed(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed(current, saved_mask); -} - -static void drv_write(struct drv_cmd *cmd) -{ - cpumask_t saved_mask = current->cpus_allowed; - unsigned int i; - - for_each_cpu_mask(i, cmd->mask) { - set_cpus_allowed(current, cpumask_of_cpu(i)); - do_drv_write(cmd); - } - - set_cpus_allowed(current, saved_mask); - return; -} - -static u32 get_cur_val(cpumask_t mask) -{ - struct acpi_processor_performance *perf; - struct drv_cmd cmd; - - if (unlikely(cpus_empty(mask))) - return 0; - - switch (drv_data[first_cpu(mask)]->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - perf = drv_data[first_cpu(mask)]->acpi_data; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - break; - default: - return 0; + } else { + return -ENODEV; } - - cmd.mask = mask; - - drv_read(&cmd); - - dprintk("get_cur_val = %u\n", cmd.val); - - return cmd.val; + return 0; } -/* - * Return the measured active (C0) frequency on this CPU since last call - * to this function. - * Input: cpu number - * Return: Average CPU frequency in terms of max frequency (zero on error) - * - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance - * over a period of time, while CPU is in C0 state. - * IA32_MPERF counts at the rate of max advertised frequency - * IA32_APERF counts at the rate of actual CPU frequency - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and - * no meaning should be associated with absolute values of these MSRs. - */ -static unsigned int get_measured_perf(unsigned int cpu) +static int +acpi_processor_set_performance ( + struct cpufreq_acpi_io *data, + unsigned int cpu, + int state) { - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf_cur, mperf_cur; - - cpumask_t saved_mask; - unsigned int perf_percent; - unsigned int retval; - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (get_cpu() != cpu) { - /* We were not able to run on requested processor */ - put_cpu(); - return 0; + u16 port = 0; + u8 bit_width = 0; + int i = 0; + int ret = 0; + u32 value = 0; + int retval; + struct acpi_processor_performance *perf; + + dprintk("acpi_processor_set_performance\n"); + + retval = 0; + perf = data->acpi_data; + if (state == perf->state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", state); + return (retval); + } } - rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0,0); - wrmsr(MSR_IA32_MPERF, 0,0); + dprintk("Transitioning from P%d to P%d\n", perf->state, state); -#ifdef __i386__ /* - * We dont want to do 64 bit divide with 32 bit kernel - * Get an approximate value. Return failure in case we cannot get - * an approximate value. + * First we write the target state's 'control' value to the + * control_register. */ - if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { - int shift_count; - u32 h; - h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); - shift_count = fls(h); + port = perf->control_register.address; + bit_width = perf->control_register.bit_width; + value = (u32) perf->states[state].control; - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } + dprintk("Writing 0x%08x to port 0x%04x\n", value, port); - if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { - int shift_count = 7; - aperf_cur.split.lo >>= shift_count; - mperf_cur.split.lo >>= shift_count; + ret = acpi_processor_write_port(port, bit_width, value); + if (ret) { + dprintk("Invalid port width 0x%04x\n", bit_width); + return (ret); } - if (aperf_cur.split.lo && mperf_cur.split.lo) - perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; - else - perf_percent = 0; - -#else - if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { - int shift_count = 7; - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; + /* + * Assume the write went through when acpi_pstate_strict is not used. + * As read status_register is an expensive operation and there + * are no specific error cases where an IO port write will fail. + */ + if (acpi_pstate_strict) { + /* Then we read the 'status_register' and compare the value + * with the target state's 'status' to make sure the + * transition was successful. + * Note that we'll poll for up to 1ms (100 cycles of 10us) + * before giving up. + */ + + port = perf->status_register.address; + bit_width = perf->status_register.bit_width; + + dprintk("Looking for 0x%08x from port 0x%04x\n", + (u32) perf->states[state].status, port); + + for (i = 0; i < 100; i++) { + ret = acpi_processor_read_port(port, bit_width, &value); + if (ret) { + dprintk("Invalid port width 0x%04x\n", bit_width); + return (ret); + } + if (value == (u32) perf->states[state].status) + break; + udelay(10); + } + } else { + value = (u32) perf->states[state].status; } - if (aperf_cur.whole && mperf_cur.whole) - perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; - else - perf_percent = 0; - -#endif - - retval = drv_data[cpu]->max_freq * perf_percent / 100; - - put_cpu(); - set_cpus_allowed(current, saved_mask); - - dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); - return retval; -} - -static unsigned int get_cur_freq_on_cpu(unsigned int cpu) -{ - struct acpi_cpufreq_data *data = drv_data[cpu]; - unsigned int freq; - - dprintk("get_cur_freq_on_cpu (%d)\n", cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return 0; + if (unlikely(value != (u32) perf->states[state].status)) { + printk(KERN_WARNING "acpi-cpufreq: Transition failed\n"); + retval = -ENODEV; + return (retval); } - freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); - dprintk("cur freq = %u\n", freq); + dprintk("Transition successful after %d microseconds\n", i * 10); - return freq; + perf->state = state; + return (retval); } -static unsigned int check_freqs(cpumask_t mask, unsigned int freq, - struct acpi_cpufreq_data *data) -{ - unsigned int cur_freq; - unsigned int i; - - for (i=0; i<100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); - if (cur_freq == freq) - return 1; - udelay(10); - } - return 0; -} -static int acpi_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) +static int +acpi_cpufreq_target ( + struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; struct acpi_processor_performance *perf; struct cpufreq_freqs freqs; cpumask_t online_policy_cpus; - struct drv_cmd cmd; - unsigned int msr; + cpumask_t saved_mask; + cpumask_t set_mask; + cpumask_t covered_cpus; + unsigned int cur_state = 0; unsigned int next_state = 0; - unsigned int next_perf_state = 0; - unsigned int i; - int result = 0; - - dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); + unsigned int result = 0; + unsigned int j; + unsigned int tmp; - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return -ENODEV; - } + dprintk("acpi_cpufreq_setpolicy\n"); - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, - data->freq_table, - target_freq, - relation, &next_state); + data->freq_table, + target_freq, + relation, + &next_state); if (unlikely(result)) - return -ENODEV; + return (result); + + perf = data->acpi_data; + cur_state = perf->state; + freqs.old = data->freq_table[cur_state].frequency; + freqs.new = data->freq_table[next_state].frequency; #ifdef CONFIG_HOTPLUG_CPU /* cpufreq holds the hotplug lock, so we are safe from here on */ @@ -417,84 +231,106 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, online_policy_cpus = policy->cpus; #endif - next_perf_state = data->freq_table[next_state].index; - if (perf->state == next_perf_state) { - if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", - next_perf_state); - data->resume = 0; - } else { - dprintk("Already at target state (P%d)\n", - next_perf_state); - return 0; - } - } - - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - msr = - (u32) perf->states[next_perf_state]. - control & INTEL_MSR_RANGE; - cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - default: - return -ENODEV; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - cpus_clear(cmd.mask); + /* + * We need to call driver->target() on all or any CPU in + * policy->cpus, depending on policy->shared_type. + */ + saved_mask = current->cpus_allowed; + cpus_clear(covered_cpus); + for_each_cpu_mask(j, online_policy_cpus) { + /* + * Support for SMP systems. + * Make sure we are running on CPU that wants to change freq + */ + cpus_clear(set_mask); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpus_or(set_mask, set_mask, online_policy_cpus); + else + cpu_set(j, set_mask); + + set_cpus_allowed(current, set_mask); + if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + dprintk("couldn't limit to CPUs in this domain\n"); + result = -EAGAIN; + break; + } - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = online_policy_cpus; - else - cpu_set(policy->cpu, cmd.mask); + result = acpi_processor_set_performance (data, j, next_state); + if (result) { + result = -EAGAIN; + break; + } - freqs.old = data->freq_table[perf->state].frequency; - freqs.new = data->freq_table[next_perf_state].frequency; - for_each_cpu_mask(i, cmd.mask) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + break; + + cpu_set(j, covered_cpus); } - drv_write(&cmd); + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } - if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { - dprintk("acpi_cpufreq_target failed (%d)\n", - policy->cpu); - return -EAGAIN; + if (unlikely(result)) { + /* + * We have failed halfway through the frequency change. + * We have sent callbacks to online_policy_cpus and + * acpi_processor_set_performance() has been called on + * coverd_cpus. Best effort undo.. + */ + + if (!cpus_empty(covered_cpus)) { + for_each_cpu_mask(j, covered_cpus) { + policy->cpu = j; + acpi_processor_set_performance (data, + j, + cur_state); + } } - } - for_each_cpu_mask(i, cmd.mask) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + tmp = freqs.new; + freqs.new = freqs.old; + freqs.old = tmp; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } - perf->state = next_perf_state; - return result; + set_cpus_allowed(current, saved_mask); + return (result); } -static int acpi_cpufreq_verify(struct cpufreq_policy *policy) + +static int +acpi_cpufreq_verify ( + struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + unsigned int result = 0; + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; dprintk("acpi_cpufreq_verify\n"); - return cpufreq_frequency_table_verify(policy, data->freq_table); + result = cpufreq_frequency_table_verify(policy, + data->freq_table); + + return (result); } + static unsigned long -acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) +acpi_cpufreq_guess_freq ( + struct cpufreq_acpi_io *data, + unsigned int cpu) { - struct acpi_processor_performance *perf = data->acpi_data; + struct acpi_processor_performance *perf = data->acpi_data; if (cpu_khz) { /* search the closest match to cpu_khz */ @@ -502,16 +338,16 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) unsigned long freq; unsigned long freqn = perf->states[0].core_frequency * 1000; - for (i=0; i<(perf->state_count-1); i++) { + for (i = 0; i < (perf->state_count - 1); i++) { freq = freqn; freqn = perf->states[i+1].core_frequency * 1000; if ((2 * cpu_khz) > (freqn + freq)) { perf->state = i; - return freq; + return (freq); } } - perf->state = perf->state_count-1; - return freqn; + perf->state = perf->state_count - 1; + return (freqn); } else { /* assume CPU is at P0... */ perf->state = 0; @@ -519,6 +355,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) } } + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -527,34 +364,30 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) * do _PDC and _PSD and find out the processor dependency for the * actual init that will happen later... */ -static int acpi_cpufreq_early_init(void) +static int acpi_cpufreq_early_init_acpi(void) { - struct acpi_processor_performance *data; - cpumask_t covered; - unsigned int i, j; + struct acpi_processor_performance *data; + unsigned int i, j; dprintk("acpi_cpufreq_early_init\n"); for_each_possible_cpu(i) { - data = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); + data = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); if (!data) { - for_each_cpu_mask(j, covered) { + for_each_possible_cpu(j) { kfree(acpi_perf_data[j]); acpi_perf_data[j] = NULL; } - return -ENOMEM; + return (-ENOMEM); } acpi_perf_data[i] = data; - cpu_set(i, covered); } /* Do initialization in ACPI core */ - acpi_processor_preregister_performance(acpi_perf_data); - return 0; + return acpi_processor_preregister_performance(acpi_perf_data); } -#ifdef CONFIG_SMP /* * Some BIOSes do SW_ANY coordination internally, either set it up in hw * or do it in BIOS firmware and won't inform about it to OS. If not @@ -581,42 +414,39 @@ static struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; -#endif -static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) +static int +acpi_cpufreq_cpu_init ( + struct cpufreq_policy *policy) { - unsigned int i; - unsigned int valid_states = 0; - unsigned int cpu = policy->cpu; - struct acpi_cpufreq_data *data; - unsigned int result = 0; + unsigned int i; + unsigned int cpu = policy->cpu; + struct cpufreq_acpi_io *data; + unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; - struct acpi_processor_performance *perf; + struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); if (!acpi_perf_data[cpu]) - return -ENODEV; + return (-ENODEV); - data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); + data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) - return -ENOMEM; + return (-ENOMEM); data->acpi_data = acpi_perf_data[cpu]; - drv_data[cpu] = data; - - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) - acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; + acpi_io_data[cpu] = data; result = acpi_processor_register_performance(data->acpi_data, cpu); + if (result) goto err_free; perf = data->acpi_data; policy->shared_type = perf->shared_type; - /* - * Will let policy->cpus know about dependency only when software + * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || @@ -632,6 +462,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } #endif + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { + acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; + } + /* capability check */ if (perf->state_count <= 1) { dprintk("No P-States\n"); @@ -639,33 +473,17 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_unreg; } - if (perf->control_register.space_id != perf->status_register.space_id) { - result = -ENODEV; - goto err_unreg; - } - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - dprintk("SYSTEM IO addr space\n"); - data->cpu_feature = SYSTEM_IO_CAPABLE; - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - dprintk("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; - } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; - default: - dprintk("Unknown addr space %d\n", - (u32) (perf->control_register.space_id)); + if ((perf->control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) || + (perf->status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { + dprintk("Unsupported address space [%d, %d]\n", + (u32) (perf->control_register.space_id), + (u32) (perf->status_register.space_id)); result = -ENODEV; goto err_unreg; } - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * - (perf->state_count+1), GFP_KERNEL); + /* alloc freq_table */ + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (perf->state_count + 1), GFP_KERNEL); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; @@ -674,140 +492,129 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* detect transition latency */ policy->cpuinfo.transition_latency = 0; for (i=0; istate_count; i++) { - if ((perf->states[i].transition_latency * 1000) > - policy->cpuinfo.transition_latency) - policy->cpuinfo.transition_latency = - perf->states[i].transition_latency * 1000; + if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - data->max_freq = perf->states[0].core_frequency * 1000; + /* The current speed is unknown and not detectable by ACPI... */ + policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); + /* table init */ - for (i=0; istate_count; i++) { - if (i>0 && perf->states[i].core_frequency == - perf->states[i-1].core_frequency) - continue; - - data->freq_table[valid_states].index = i; - data->freq_table[valid_states].frequency = - perf->states[i].core_frequency * 1000; - valid_states++; + for (i=0; i<=perf->state_count; i++) + { + data->freq_table[i].index = i; + if (istate_count) + data->freq_table[i].frequency = perf->states[i].core_frequency * 1000; + else + data->freq_table[i].frequency = CPUFREQ_TABLE_END; } - data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); - if (result) + if (result) { goto err_freqfree; - - switch (data->cpu_feature) { - case ACPI_ADR_SPACE_SYSTEM_IO: - /* Current speed is unknown and not detectable by IO port */ - policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - acpi_cpufreq_driver.get = get_cur_freq_on_cpu; - get_cur_freq_on_cpu(cpu); - break; - default: - break; } /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - /* Check for APERF/MPERF support in hardware */ - if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { - unsigned int ecx; - ecx = cpuid_ecx(6); - if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) - acpi_cpufreq_driver.getavg = get_measured_perf; - } - - dprintk("CPU%u - ACPI performance management activated.\n", cpu); + printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n", + cpu); for (i = 0; i < perf->state_count; i++) dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", - (i == perf->state ? '*' : ' '), i, + (i == perf->state?'*':' '), i, (u32) perf->states[i].core_frequency, (u32) perf->states[i].power, (u32) perf->states[i].transition_latency); cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); - + /* * the first call to ->target() should result in us actually * writing something to the appropriate registers. */ data->resume = 1; + + return (result); - return result; - -err_freqfree: + err_freqfree: kfree(data->freq_table); -err_unreg: + err_unreg: acpi_processor_unregister_performance(perf, cpu); -err_free: + err_free: kfree(data); - drv_data[cpu] = NULL; + acpi_io_data[cpu] = NULL; - return result; + return (result); } -static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) + +static int +acpi_cpufreq_cpu_exit ( + struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + dprintk("acpi_cpufreq_cpu_exit\n"); if (data) { cpufreq_frequency_table_put_attr(policy->cpu); - drv_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); + acpi_io_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(data->acpi_data, policy->cpu); kfree(data); } - return 0; + return (0); } -static int acpi_cpufreq_resume(struct cpufreq_policy *policy) +static int +acpi_cpufreq_resume ( + struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + dprintk("acpi_cpufreq_resume\n"); data->resume = 1; - return 0; + return (0); } -static struct freq_attr *acpi_cpufreq_attr[] = { + +static struct freq_attr* acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; -static int __init acpi_cpufreq_init(void) + +static int __init +acpi_cpufreq_init (void) { dprintk("acpi_cpufreq_init\n"); - acpi_cpufreq_early_init(); + acpi_cpufreq_early_init_acpi(); return cpufreq_register_driver(&acpi_cpufreq_driver); } -static void __exit acpi_cpufreq_exit(void) + +static void __exit +acpi_cpufreq_exit (void) { - unsigned int i; + unsigned int i; dprintk("acpi_cpufreq_exit\n"); cpufreq_unregister_driver(&acpi_cpufreq_driver); @@ -820,9 +627,7 @@ static void __exit acpi_cpufreq_exit(void) } module_param(acpi_pstate_strict, uint, 0644); -MODULE_PARM_DESC(acpi_pstate_strict, - "value 0 or non-zero. non-zero -> strict ACPI checks are " - "performed during frequency changes."); +MODULE_PARM_DESC(acpi_pstate_strict, "value 0 or non-zero. non-zero -> strict ACPI checks are performed during frequency changes."); late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/trunk/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 6667e9cceb9f..92afa3bc84f1 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -447,6 +447,7 @@ static int __init cpufreq_gx_init(void) int ret; struct gxfreq_params *params; struct pci_dev *gx_pci; + u32 class_rev; /* Test if we have the right hardware */ if ((gx_pci = gx_detect_chipset()) == NULL) @@ -471,7 +472,8 @@ static int __init cpufreq_gx_init(void) pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); - pci_read_config_byte(params->cs55x0, PCI_REVISION_ID, ¶ms->pci_rev); + pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev); + params->pci_rev = class_rev && 0xff; if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { kfree(params); diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/longhaul.c b/trunk/arch/i386/kernel/cpu/cpufreq/longhaul.c index c548daad3476..7233abe5d695 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -410,7 +410,7 @@ static int __init longhaul_get_ranges(void) maxmult=longhaul_get_cpu_mult(); /* Starting with the 1.2GHz parts, theres a 200MHz bus. */ - if ((cpu_khz/maxmult) > 13400) + if ((cpu_khz/1000) > 1200) fsb = 200; else fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; @@ -583,10 +583,6 @@ static int enable_arbiter_disable(void) if (dev == NULL) { reg = 0x76; dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_862X_0, NULL); - /* Find CN400 V-Link host bridge */ - if (dev == NULL) - dev = pci_find_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); - } if (dev != NULL) { /* Enable access to port 0x22 */ @@ -738,7 +734,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) return 0; err_acpi: - printk(KERN_ERR PFX "No ACPI support. Unsupported northbridge. Aborting.\n"); + printk(KERN_ERR PFX "No ACPI support. No VT8601 or VT8623 northbridge. Aborting.\n"); return -ENODEV; } diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/trunk/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index bec50170b75a..304d2eaa4a1b 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -163,27 +163,29 @@ static int cpufreq_p4_verify(struct cpufreq_policy *policy) static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) { - if (c->x86 == 0x06) { - if (cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " - "The acpi-cpufreq module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); - switch (c->x86_model) { - case 0x0E: /* Core */ - case 0x0F: /* Core Duo */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); - case 0x0D: /* Pentium M (Dothan) */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - /* fall through */ - case 0x09: /* Pentium M (Banias) */ - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); - } + if ((c->x86 == 0x06) && (c->x86_model == 0x09)) { + /* Pentium M (Banias) */ + printk(KERN_WARNING PFX "Warning: Pentium M detected. " + "The speedstep_centrino module offers voltage scaling" + " in addition of frequency scaling. You should use " + "that instead of p4-clockmod, if possible.\n"); + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + } + + if ((c->x86 == 0x06) && (c->x86_model == 0x0D)) { + /* Pentium M (Dothan) */ + printk(KERN_WARNING PFX "Warning: Pentium M detected. " + "The speedstep_centrino module offers voltage scaling" + " in addition of frequency scaling. You should use " + "that instead of p4-clockmod, if possible.\n"); + /* on P-4s, the TSC runs with constant frequency independent whether + * throttling is active or not. */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); return 0; } diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/trunk/arch/i386/kernel/cpu/cpufreq/sc520_freq.c index b8fb4b521c62..ef457d50f4ac 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/sc520_freq.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/sc520_freq.c @@ -153,7 +153,6 @@ static struct cpufreq_driver sc520_freq_driver = { static int __init sc520_freq_init(void) { struct cpuinfo_x86 *c = cpu_data; - int err; /* Test if we have the right hardware */ if(c->x86_vendor != X86_VENDOR_AMD || @@ -167,11 +166,7 @@ static int __init sc520_freq_init(void) return -ENOMEM; } - err = cpufreq_register_driver(&sc520_freq_driver); - if (err) - iounmap(cpuctl); - - return err; + return cpufreq_register_driver(&sc520_freq_driver); } diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 5113e9231634..e8993baf3d14 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -36,7 +36,6 @@ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) -#define INTEL_MSR_RANGE (0xffff) struct cpu_id { @@ -380,7 +379,6 @@ static int centrino_cpu_early_init_acpi(void) } -#ifdef CONFIG_SMP /* * Some BIOSes do SW_ANY coordination internally, either set it up in hw * or do it in BIOS firmware and won't inform about it to OS. If not @@ -394,6 +392,7 @@ static int sw_any_bug_found(struct dmi_system_id *d) return 0; } + static struct dmi_system_id sw_any_bug_dmi_table[] = { { .callback = sw_any_bug_found, @@ -406,7 +405,7 @@ static struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; -#endif + /* * centrino_cpu_init_acpi - register with ACPI P-States library @@ -464,9 +463,8 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } for (i=0; istate_count; i++) { - if ((p->states[i].control & INTEL_MSR_RANGE) != - (p->states[i].status & INTEL_MSR_RANGE)) { - dprintk("Different MSR bits in control (%llu) and status (%llu)\n", + if (p->states[i].control != p->states[i].status) { + dprintk("Different control (%llu) and status values (%llu)\n", p->states[i].control, p->states[i].status); result = -EINVAL; goto err_unreg; @@ -502,7 +500,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } for (i=0; istate_count; i++) { - centrino_model[cpu]->op_points[i].index = p->states[i].control & INTEL_MSR_RANGE; + centrino_model[cpu]->op_points[i].index = p->states[i].control; centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000; dprintk("adding state %i with frequency %u and control value %04x\n", i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index); @@ -533,9 +531,6 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI" - "config is deprecated.\n " - "Use X86_ACPI_CPUFREQ (acpi-cpufreq instead.\n" ); return 0; diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index d59277c00911..4f46cac155c4 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -123,36 +123,6 @@ static unsigned int pentiumM_get_frequency(void) return (msr_tmp * 100 * 1000); } -static unsigned int pentium_core_get_frequency(void) -{ - u32 fsb = 0; - u32 msr_lo, msr_tmp; - - rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); - /* see table B-2 of 25366920.pdf */ - switch (msr_lo & 0x07) { - case 5: - fsb = 100000; - break; - case 1: - fsb = 133333; - break; - case 3: - fsb = 166667; - break; - default: - printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); - } - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); - - return (msr_tmp * fsb); -} - static unsigned int pentium4_get_frequency(void) { @@ -204,8 +174,6 @@ static unsigned int pentium4_get_frequency(void) unsigned int speedstep_get_processor_frequency(unsigned int processor) { switch (processor) { - case SPEEDSTEP_PROCESSOR_PCORE: - return pentium_core_get_frequency(); case SPEEDSTEP_PROCESSOR_PM: return pentiumM_get_frequency(); case SPEEDSTEP_PROCESSOR_P4D: diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index b11bcc608cac..b735429c50b4 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -22,7 +22,6 @@ * the speedstep_get_processor_frequency() call. */ #define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ #define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ /* speedstep states -- only two of them */ diff --git a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index ff0d89806114..c28333d53646 100644 --- a/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/trunk/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -360,6 +360,9 @@ static int __init speedstep_init(void) case SPEEDSTEP_PROCESSOR_PIII_C: case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: break; + case SPEEDSTEP_PROCESSOR_P4M: + printk(KERN_INFO "speedstep-smi: you're trying to use this cpufreq driver on a Pentium 4-based CPU. Most likely it will not work.\n"); + break; default: speedstep_processor = 0; } diff --git a/trunk/arch/i386/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/i386/kernel/cpu/mcheck/therm_throt.c index 2d8703b7ce65..4f43047de406 100644 --- a/trunk/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/trunk/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -110,15 +110,17 @@ int therm_throt_process(int curr) #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev) { - return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); + sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return 0; } #ifdef CONFIG_HOTPLUG_CPU -static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) +static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev) { - return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); + sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return 0; } /* Mutex protecting device creation against CPU hotplug */ @@ -131,14 +133,12 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, { unsigned int cpu = (unsigned long)hcpu; struct sys_device *sys_dev; - int err; sys_dev = get_cpu_sysdev(cpu); mutex_lock(&therm_cpu_lock); switch (action) { case CPU_ONLINE: - err = thermal_throttle_add_dev(sys_dev); - WARN_ON(err); + thermal_throttle_add_dev(sys_dev); break; case CPU_DEAD: thermal_throttle_remove_dev(sys_dev); @@ -157,7 +157,6 @@ static struct notifier_block thermal_throttle_cpu_notifier = static __init int thermal_throttle_init_device(void) { unsigned int cpu = 0; - int err; if (!atomic_read(&therm_throt_en)) return 0; @@ -168,10 +167,8 @@ static __init int thermal_throttle_init_device(void) mutex_lock(&therm_cpu_lock); #endif /* connect live CPUs to sysfs */ - for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); - WARN_ON(err); - } + for_each_online_cpu(cpu) + thermal_throttle_add_dev(get_cpu_sysdev(cpu)); #ifdef CONFIG_HOTPLUG_CPU mutex_unlock(&therm_cpu_lock); #endif diff --git a/trunk/arch/i386/kernel/io_apic.c b/trunk/arch/i386/kernel/io_apic.c index 27bceaf5ce40..cd082c36ca03 100644 --- a/trunk/arch/i386/kernel/io_apic.c +++ b/trunk/arch/i386/kernel/io_apic.c @@ -2594,7 +2594,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -static struct irq_chip ht_irq_chip = { +static struct hw_interrupt_type ht_irq_chip = { .name = "PCI-HT", .mask = mask_ht_irq, .unmask = unmask_ht_irq, diff --git a/trunk/arch/i386/kernel/microcode.c b/trunk/arch/i386/kernel/microcode.c index c4d0291b519f..9b9479768d5e 100644 --- a/trunk/arch/i386/kernel/microcode.c +++ b/trunk/arch/i386/kernel/microcode.c @@ -656,18 +656,14 @@ static struct attribute_group mc_attr_group = { static int mc_sysdev_add(struct sys_device *sys_dev) { - int err, cpu = sys_dev->id; + int cpu = sys_dev->id; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!cpu_online(cpu)) return 0; - pr_debug("Microcode:CPU %d added\n", cpu); memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; + sysfs_create_group(&sys_dev->kobj, &mc_attr_group); microcode_init_cpu(cpu); return 0; diff --git a/trunk/arch/i386/kernel/process.c b/trunk/arch/i386/kernel/process.c index 57d375900afb..b0a07801d9df 100644 --- a/trunk/arch/i386/kernel/process.c +++ b/trunk/arch/i386/kernel/process.c @@ -236,28 +236,20 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); * We execute MONITOR against need_resched and enter optimized wait state * through MWAIT. Whenever someone changes need_resched, we would be woken * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. */ -void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static void mwait_idle(void) { - if (!need_resched()) { + local_irq_enable(); + + while (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (!need_resched()) - __mwait(eax, ecx); + if (need_resched()) + break; + __mwait(0, 0); } } -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0, 0); -} - void __devinit select_idle_routine(const struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_MWAIT)) { diff --git a/trunk/arch/i386/kernel/setup.c b/trunk/arch/i386/kernel/setup.c index 519e63c3c130..000cf03751fe 100644 --- a/trunk/arch/i386/kernel/setup.c +++ b/trunk/arch/i386/kernel/setup.c @@ -1083,15 +1083,16 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn}; add_active_range(0, 0, highend_pfn); #else + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn}; add_active_range(0, 0, max_low_pfn); #endif diff --git a/trunk/arch/i386/kernel/syscall_table.S b/trunk/arch/i386/kernel/syscall_table.S index 2697e9210e92..7e639f78b0b9 100644 --- a/trunk/arch/i386/kernel/syscall_table.S +++ b/trunk/arch/i386/kernel/syscall_table.S @@ -318,4 +318,3 @@ ENTRY(sys_call_table) .long sys_vmsplice .long sys_move_pages .long sys_getcpu - .long sys_epoll_pwait diff --git a/trunk/arch/i386/lib/usercopy.c b/trunk/arch/i386/lib/usercopy.c index 258df6b4d7d7..08502fc6d0cb 100644 --- a/trunk/arch/i386/lib/usercopy.c +++ b/trunk/arch/i386/lib/usercopy.c @@ -179,7 +179,7 @@ __clear_user(void __user *to, unsigned long n) EXPORT_SYMBOL(__clear_user); /** - * strnlen_user: - Get the size of a string in user space. + * strlen_user: - Get the size of a string in user space. * @s: The string to measure. * @n: The maximum valid length * diff --git a/trunk/arch/i386/mach-voyager/voyager_basic.c b/trunk/arch/i386/mach-voyager/voyager_basic.c index 8fe7e4593d5f..c639d30d8bdc 100644 --- a/trunk/arch/i386/mach-voyager/voyager_basic.c +++ b/trunk/arch/i386/mach-voyager/voyager_basic.c @@ -44,7 +44,7 @@ struct voyager_SUS *voyager_SUS = NULL; #ifdef CONFIG_SMP static void -voyager_dump(int dummy1, struct tty_struct *dummy3) +voyager_dump(int dummy1, struct pt_regs *dummy2, struct tty_struct *dummy3) { /* get here via a sysrq */ voyager_smp_dump(); @@ -166,7 +166,7 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length) * off the timer tick to the SMP code, since the VIC doesn't have an * internal timer (The QIC does, but that's another story). */ void -voyager_timer_interrupt(void) +voyager_timer_interrupt(struct pt_regs *regs) { if((jiffies & 0x3ff) == 0) { @@ -202,7 +202,7 @@ voyager_timer_interrupt(void) } } #ifdef CONFIG_SMP - smp_vic_timer_interrupt(); + smp_vic_timer_interrupt(regs); #endif } diff --git a/trunk/arch/i386/mach-voyager/voyager_smp.c b/trunk/arch/i386/mach-voyager/voyager_smp.c index f3fea2ad50fe..d42422fc4af3 100644 --- a/trunk/arch/i386/mach-voyager/voyager_smp.c +++ b/trunk/arch/i386/mach-voyager/voyager_smp.c @@ -85,8 +85,8 @@ static int ack_QIC_CPI(__u8 cpi); static void ack_special_QIC_CPI(__u8 cpi); static void ack_VIC_CPI(__u8 cpi); static void send_CPI_allbutself(__u8 cpi); -static void mask_vic_irq(unsigned int irq); -static void unmask_vic_irq(unsigned int irq); +static void enable_vic_irq(unsigned int irq); +static void disable_vic_irq(unsigned int irq); static unsigned int startup_vic_irq(unsigned int irq); static void enable_local_vic_irq(unsigned int irq); static void disable_local_vic_irq(unsigned int irq); @@ -205,12 +205,15 @@ ack_CPI(__u8 cpi) /* The VIC IRQ descriptors -- these look almost identical to the * 8259 IRQs except that masks and things must be kept per processor */ -static struct irq_chip vic_chip = { - .name = "VIC", - .startup = startup_vic_irq, - .mask = mask_vic_irq, - .unmask = unmask_vic_irq, - .set_affinity = set_vic_irq_affinity, +static struct hw_interrupt_type vic_irq_type = { + .typename = "VIC-level", + .startup = startup_vic_irq, + .shutdown = disable_vic_irq, + .enable = enable_vic_irq, + .disable = disable_vic_irq, + .ack = before_handle_vic_irq, + .end = after_handle_vic_irq, + .set_affinity = set_vic_irq_affinity, }; /* used to count up as CPUs are brought on line (starts at 0) */ @@ -1141,9 +1144,9 @@ smp_apic_timer_interrupt(struct pt_regs *regs) fastcall void smp_qic_timer_interrupt(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); ack_QIC_CPI(QIC_TIMER_CPI); - wrapper_smp_local_timer_interrupt(); + struct pt_regs *old_regs = set_irq_regs(regs); + wrapper_smp_local_timer_interrupt(void); set_irq_regs(old_regs); } @@ -1267,10 +1270,12 @@ smp_send_stop(void) /* this function is triggered in time.c when a clock tick fires * we need to re-broadcast the tick to all CPUs */ void -smp_vic_timer_interrupt(void) +smp_vic_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs = set_irq_regs(regs); send_CPI_allbutself(VIC_TIMER_CPI); smp_local_timer_interrupt(); + set_irq_regs(old_regs); } /* local (per CPU) timer interrupt. It does both profiling and @@ -1305,7 +1310,7 @@ smp_local_timer_interrupt(void) per_cpu(prof_counter, cpu); } - update_process_times(user_mode_vm(get_irq_regs())); + update_process_times(user_mode_vm(irq_regs)); } if( ((1<> PAGE_SHIFT; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn + }; /* If SRAT has not registered memory, register it now */ if (find_max_pfn_with_active_regions() == 0) { diff --git a/trunk/arch/ia64/mm/contig.c b/trunk/arch/ia64/mm/contig.c index 82deaa3a7c48..daf977ff2920 100644 --- a/trunk/arch/ia64/mm/contig.c +++ b/trunk/arch/ia64/mm/contig.c @@ -233,7 +233,6 @@ paging_init (void) efi_memmap_walk(count_pages, &num_physpages); max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = max_dma; max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/trunk/arch/ia64/mm/discontig.c b/trunk/arch/ia64/mm/discontig.c index 96722cb1b49d..d497b6b0f5b2 100644 --- a/trunk/arch/ia64/mm/discontig.c +++ b/trunk/arch/ia64/mm/discontig.c @@ -709,7 +709,6 @@ void __init paging_init(void) max_pfn = mem_data[node].max_pfn; } - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = max_dma; max_zone_pfns[ZONE_NORMAL] = max_pfn; free_area_init_nodes(max_zone_pfns); diff --git a/trunk/arch/m32r/kernel/setup.c b/trunk/arch/m32r/kernel/setup.c index 0e7778be33cc..3f35ab3d2dc2 100644 --- a/trunk/arch/m32r/kernel/setup.c +++ b/trunk/arch/m32r/kernel/setup.c @@ -369,10 +369,10 @@ static void c_stop(struct seq_file *m, void *v) } struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, }; #endif /* CONFIG_PROC_FS */ diff --git a/trunk/arch/m32r/kernel/setup_mappi.c b/trunk/arch/m32r/kernel/setup_mappi.c index 6b2d77da0683..67dbbdc9d111 100644 --- a/trunk/arch/m32r/kernel/setup_mappi.c +++ b/trunk/arch/m32r/kernel/setup_mappi.c @@ -86,7 +86,7 @@ void __init init_IRQ(void) /* INT0 : LAN controller (RTL8019AS) */ irq_desc[M32R_IRQ_INT0].status = IRQ_DISABLED; irq_desc[M32R_IRQ_INT0].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_INT0].action = NULL; + irq_desc[M32R_IRQ_INT0].action = 0; irq_desc[M32R_IRQ_INT0].depth = 1; icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10; disable_mappi_irq(M32R_IRQ_INT0); @@ -95,7 +95,7 @@ void __init init_IRQ(void) /* MFT2 : system timer */ irq_desc[M32R_IRQ_MFT2].status = IRQ_DISABLED; irq_desc[M32R_IRQ_MFT2].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_MFT2].action = NULL; + irq_desc[M32R_IRQ_MFT2].action = 0; irq_desc[M32R_IRQ_MFT2].depth = 1; icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN; disable_mappi_irq(M32R_IRQ_MFT2); @@ -104,7 +104,7 @@ void __init init_IRQ(void) /* SIO0_R : uart receive data */ irq_desc[M32R_IRQ_SIO0_R].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO0_R].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO0_R].action = NULL; + irq_desc[M32R_IRQ_SIO0_R].action = 0; irq_desc[M32R_IRQ_SIO0_R].depth = 1; icu_data[M32R_IRQ_SIO0_R].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO0_R); @@ -112,7 +112,7 @@ void __init init_IRQ(void) /* SIO0_S : uart send data */ irq_desc[M32R_IRQ_SIO0_S].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO0_S].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO0_S].action = NULL; + irq_desc[M32R_IRQ_SIO0_S].action = 0; irq_desc[M32R_IRQ_SIO0_S].depth = 1; icu_data[M32R_IRQ_SIO0_S].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO0_S); @@ -120,7 +120,7 @@ void __init init_IRQ(void) /* SIO1_R : uart receive data */ irq_desc[M32R_IRQ_SIO1_R].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO1_R].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO1_R].action = NULL; + irq_desc[M32R_IRQ_SIO1_R].action = 0; irq_desc[M32R_IRQ_SIO1_R].depth = 1; icu_data[M32R_IRQ_SIO1_R].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO1_R); @@ -128,7 +128,7 @@ void __init init_IRQ(void) /* SIO1_S : uart send data */ irq_desc[M32R_IRQ_SIO1_S].status = IRQ_DISABLED; irq_desc[M32R_IRQ_SIO1_S].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_SIO1_S].action = NULL; + irq_desc[M32R_IRQ_SIO1_S].action = 0; irq_desc[M32R_IRQ_SIO1_S].depth = 1; icu_data[M32R_IRQ_SIO1_S].icucr = 0; disable_mappi_irq(M32R_IRQ_SIO1_S); @@ -138,7 +138,7 @@ void __init init_IRQ(void) /* INT1 : pccard0 interrupt */ irq_desc[M32R_IRQ_INT1].status = IRQ_DISABLED; irq_desc[M32R_IRQ_INT1].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_INT1].action = NULL; + irq_desc[M32R_IRQ_INT1].action = 0; irq_desc[M32R_IRQ_INT1].depth = 1; icu_data[M32R_IRQ_INT1].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD00; disable_mappi_irq(M32R_IRQ_INT1); @@ -146,7 +146,7 @@ void __init init_IRQ(void) /* INT2 : pccard1 interrupt */ irq_desc[M32R_IRQ_INT2].status = IRQ_DISABLED; irq_desc[M32R_IRQ_INT2].chip = &mappi_irq_type; - irq_desc[M32R_IRQ_INT2].action = NULL; + irq_desc[M32R_IRQ_INT2].action = 0; irq_desc[M32R_IRQ_INT2].depth = 1; icu_data[M32R_IRQ_INT2].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD00; disable_mappi_irq(M32R_IRQ_INT2); diff --git a/trunk/arch/m32r/kernel/signal.c b/trunk/arch/m32r/kernel/signal.c index b60cea4aebaa..a9174efe80cb 100644 --- a/trunk/arch/m32r/kernel/signal.c +++ b/trunk/arch/m32r/kernel/signal.c @@ -33,7 +33,7 @@ int do_signal(struct pt_regs *, sigset_t *); asmlinkage int -sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, unsigned long r2, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, struct pt_regs *regs) { @@ -78,8 +78,8 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct rt_sigframe { int sig; - struct siginfo __user *pinfo; - void __user *puc; + struct siginfo *pinfo; + void *puc; struct siginfo info; struct ucontext uc; // struct _fpstate fpstate; diff --git a/trunk/arch/m32r/kernel/smp.c b/trunk/arch/m32r/kernel/smp.c index 360129174b2b..722e21f556dc 100644 --- a/trunk/arch/m32r/kernel/smp.c +++ b/trunk/arch/m32r/kernel/smp.c @@ -231,7 +231,7 @@ void smp_flush_tlb_all(void) local_irq_save(flags); __flush_tlb_all(); local_irq_restore(flags); - smp_call_function(flush_tlb_all_ipi, NULL, 1, 1); + smp_call_function(flush_tlb_all_ipi, 0, 1, 1); preempt_enable(); } diff --git a/trunk/arch/m32r/kernel/sys_m32r.c b/trunk/arch/m32r/kernel/sys_m32r.c index b4e7bcb43540..b567351f3c52 100644 --- a/trunk/arch/m32r/kernel/sys_m32r.c +++ b/trunk/arch/m32r/kernel/sys_m32r.c @@ -31,7 +31,7 @@ /* * sys_tas() - test-and-set */ -asmlinkage int sys_tas(int __user *addr) +asmlinkage int sys_tas(int *addr) { int oldval; @@ -90,7 +90,7 @@ sys_pipe(unsigned long r0, unsigned long r1, unsigned long r2, error = do_pipe(fd); if (!error) { - if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) + if (copy_to_user((void *)r0, (void *)fd, 2*sizeof(int))) error = -EFAULT; } return error; @@ -201,7 +201,7 @@ asmlinkage int sys_ipc(uint call, int first, int second, } } -asmlinkage int sys_uname(struct old_utsname __user * name) +asmlinkage int sys_uname(struct old_utsname * name) { int err; if (!name) diff --git a/trunk/arch/m32r/kernel/traps.c b/trunk/arch/m32r/kernel/traps.c index 97e0b1c0830e..c1daf2c40c7c 100644 --- a/trunk/arch/m32r/kernel/traps.c +++ b/trunk/arch/m32r/kernel/traps.c @@ -268,7 +268,7 @@ static __inline__ void do_trap(int trapnr, int signr, const char * str, #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ - do_trap(trapnr, signr, NULL, regs, error_code, NULL); \ + do_trap(trapnr, signr, 0, regs, error_code, NULL); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ diff --git a/trunk/arch/m68k/kernel/m68k_ksyms.c b/trunk/arch/m68k/kernel/m68k_ksyms.c index 6fc69c74fe2e..f9636e84e6a4 100644 --- a/trunk/arch/m68k/kernel/m68k_ksyms.c +++ b/trunk/arch/m68k/kernel/m68k_ksyms.c @@ -1,10 +1,61 @@ #include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include #include +#include asmlinkage long long __ashldi3 (long long, int); asmlinkage long long __ashrdi3 (long long, int); asmlinkage long long __lshrdi3 (long long, int); asmlinkage long long __muldi3 (long long, long long); +extern char m68k_debug_device[]; + +/* platform dependent support */ + +EXPORT_SYMBOL(m68k_machtype); +EXPORT_SYMBOL(m68k_cputype); +EXPORT_SYMBOL(m68k_is040or060); +EXPORT_SYMBOL(m68k_realnum_memory); +EXPORT_SYMBOL(m68k_memory); +#ifndef CONFIG_SUN3 +EXPORT_SYMBOL(cache_push); +EXPORT_SYMBOL(cache_clear); +#ifndef CONFIG_SINGLE_MEMORY_CHUNK +EXPORT_SYMBOL(mm_vtop); +EXPORT_SYMBOL(mm_ptov); +EXPORT_SYMBOL(mm_end_of_chunk); +#else +EXPORT_SYMBOL(m68k_memoffset); +#endif /* !CONFIG_SINGLE_MEMORY_CHUNK */ +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); +EXPORT_SYMBOL(kernel_set_cachemode); +#endif /* !CONFIG_SUN3 */ +EXPORT_SYMBOL(m68k_debug_device); +EXPORT_SYMBOL(mach_hwclk); +EXPORT_SYMBOL(mach_get_ss); +EXPORT_SYMBOL(mach_get_rtc_pll); +EXPORT_SYMBOL(mach_set_rtc_pll); +#ifdef CONFIG_INPUT_M68K_BEEP_MODULE +EXPORT_SYMBOL(mach_beep); +#endif +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(kernel_thread); +#ifdef CONFIG_VME +EXPORT_SYMBOL(vme_brdtype); +#endif /* The following are special because they're not called explicitly (the C compiler generates them). Fortunately, diff --git a/trunk/arch/m68k/kernel/process.c b/trunk/arch/m68k/kernel/process.c index 99fc1226f7f8..45a46646c1b3 100644 --- a/trunk/arch/m68k/kernel/process.c +++ b/trunk/arch/m68k/kernel/process.c @@ -187,7 +187,6 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) set_fs (fs); return pid; } -EXPORT_SYMBOL(kernel_thread); void flush_thread(void) { @@ -222,13 +221,13 @@ asmlinkage int m68k_clone(struct pt_regs *regs) { unsigned long clone_flags; unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; + int *parent_tidptr, *child_tidptr; /* syscall2 puts clone_flags in d1 and usp in d2 */ clone_flags = regs->d1; newsp = regs->d2; - parent_tidptr = (int __user *)regs->d3; - child_tidptr = (int __user *)regs->d4; + parent_tidptr = (int *)regs->d3; + child_tidptr = (int *)regs->d4; if (!newsp) newsp = rdusp(); return do_fork(clone_flags, newsp, regs, 0, @@ -312,7 +311,6 @@ int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu) : "memory"); return 1; } -EXPORT_SYMBOL(dump_fpu); /* * fill in the user structure for a core dump.. @@ -359,12 +357,11 @@ void dump_thread(struct pt_regs * regs, struct user * dump) /* dump floating point stuff */ dump->u_fpvalid = dump_fpu (regs, &dump->m68kfp); } -EXPORT_SYMBOL(dump_thread); /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(char *name, char **argv, char **envp) { int error; char * filename; diff --git a/trunk/arch/m68k/kernel/setup.c b/trunk/arch/m68k/kernel/setup.c index 9af3ee0e555d..42d5b85f3350 100644 --- a/trunk/arch/m68k/kernel/setup.c +++ b/trunk/arch/m68k/kernel/setup.c @@ -42,37 +42,27 @@ unsigned long m68k_machtype; unsigned long m68k_cputype; -EXPORT_SYMBOL(m68k_machtype); -EXPORT_SYMBOL(m68k_cputype); unsigned long m68k_fputype; unsigned long m68k_mmutype; #ifdef CONFIG_VME unsigned long vme_brdtype; -EXPORT_SYMBOL(vme_brdtype); #endif int m68k_is040or060; -EXPORT_SYMBOL(m68k_is040or060); extern int end; extern unsigned long availmem; int m68k_num_memory; int m68k_realnum_memory; -EXPORT_SYMBOL(m68k_realnum_memory); -#ifdef CONFIG_SINGLE_MEMORY_CHUNK unsigned long m68k_memoffset; -EXPORT_SYMBOL(m68k_memoffset); -#endif struct mem_info m68k_memory[NUM_MEMINFO]; -EXPORT_SYMBOL(m68k_memory); static struct mem_info m68k_ramdisk; static char m68k_command_line[CL_SIZE]; char m68k_debug_device[6] = ""; -EXPORT_SYMBOL(m68k_debug_device); void (*mach_sched_init) (irq_handler_t handler) __initdata = NULL; /* machine dependent irq functions */ @@ -82,14 +72,10 @@ int (*mach_get_hardware_list) (char *buffer); /* machine dependent timer functions */ unsigned long (*mach_gettimeoffset) (void); int (*mach_hwclk) (int, struct rtc_time*); -EXPORT_SYMBOL(mach_hwclk); int (*mach_set_clock_mmss) (unsigned long); unsigned int (*mach_get_ss)(void); int (*mach_get_rtc_pll)(struct rtc_pll_info *); int (*mach_set_rtc_pll)(struct rtc_pll_info *); -EXPORT_SYMBOL(mach_get_ss); -EXPORT_SYMBOL(mach_get_rtc_pll); -EXPORT_SYMBOL(mach_set_rtc_pll); void (*mach_reset)( void ); void (*mach_halt)( void ); void (*mach_power_off)( void ); @@ -103,7 +89,6 @@ void (*mach_l2_flush) (int); #endif #if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) void (*mach_beep)(unsigned int, unsigned int); -EXPORT_SYMBOL(mach_beep); #endif #if defined(CONFIG_ISA) && defined(MULTI_ISA) int isa_type; diff --git a/trunk/arch/m68k/kernel/traps.c b/trunk/arch/m68k/kernel/traps.c index 759fa244e6cd..4569406a2e1f 100644 --- a/trunk/arch/m68k/kernel/traps.c +++ b/trunk/arch/m68k/kernel/traps.c @@ -326,13 +326,13 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba, switch (wbs & WBSIZ_040) { case BA_SIZE_BYTE: - res = put_user(wbd & 0xff, (char __user *)wba); + res = put_user(wbd & 0xff, (char *)wba); break; case BA_SIZE_WORD: - res = put_user(wbd & 0xffff, (short __user *)wba); + res = put_user(wbd & 0xffff, (short *)wba); break; case BA_SIZE_LONG: - res = put_user(wbd, (int __user *)wba); + res = put_user(wbd, (int *)wba); break; } diff --git a/trunk/arch/m68k/mm/kmap.c b/trunk/arch/m68k/mm/kmap.c index b54ef1726c55..f46f049d29ff 100644 --- a/trunk/arch/m68k/mm/kmap.c +++ b/trunk/arch/m68k/mm/kmap.c @@ -7,7 +7,6 @@ * used by other architectures /Roman Zippel */ -#include #include #include #include @@ -220,7 +219,6 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla return (void __iomem *)retaddr; } -EXPORT_SYMBOL(__ioremap); /* * Unmap a ioremap()ed region again @@ -236,7 +234,6 @@ void iounmap(void __iomem *addr) free_io_area((__force void *)addr); #endif } -EXPORT_SYMBOL(iounmap); /* * __iounmap unmaps nearly everything, so be careful @@ -363,4 +360,3 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode) flush_tlb_all(); } -EXPORT_SYMBOL(kernel_set_cachemode); diff --git a/trunk/arch/m68k/mm/memory.c b/trunk/arch/m68k/mm/memory.c index 0f88812822b1..a0c095e17222 100644 --- a/trunk/arch/m68k/mm/memory.c +++ b/trunk/arch/m68k/mm/memory.c @@ -4,7 +4,6 @@ * Copyright (C) 1995 Hamish Macdonald */ -#include #include #include #include @@ -158,8 +157,9 @@ unsigned long mm_vtop(unsigned long vaddr) return -1; } -EXPORT_SYMBOL(mm_vtop); +#endif +#ifndef CONFIG_SINGLE_MEMORY_CHUNK unsigned long mm_ptov (unsigned long paddr) { int i = 0; @@ -185,7 +185,6 @@ unsigned long mm_ptov (unsigned long paddr) #endif return -1; } -EXPORT_SYMBOL(mm_ptov); #endif /* invalidate page in both caches */ @@ -299,7 +298,6 @@ void cache_clear (unsigned long paddr, int len) mach_l2_flush(0); #endif } -EXPORT_SYMBOL(cache_clear); /* probably can be unexported */ /* @@ -352,7 +350,6 @@ void cache_push (unsigned long paddr, int len) mach_l2_flush(1); #endif } -EXPORT_SYMBOL(cache_push); /* probably can be unexported */ #ifndef CONFIG_SINGLE_MEMORY_CHUNK int mm_end_of_chunk (unsigned long addr, int len) @@ -364,5 +361,4 @@ int mm_end_of_chunk (unsigned long addr, int len) return 1; return 0; } -EXPORT_SYMBOL(mm_end_of_chunk); #endif diff --git a/trunk/arch/m68k/mm/sun3kmap.c b/trunk/arch/m68k/mm/sun3kmap.c index 1af24cb5bfe1..7f0d86f3fe73 100644 --- a/trunk/arch/m68k/mm/sun3kmap.c +++ b/trunk/arch/m68k/mm/sun3kmap.c @@ -8,7 +8,6 @@ * for more details. */ -#include #include #include #include @@ -60,7 +59,7 @@ static inline void do_pmeg_mapin(unsigned long phys, unsigned long virt, } } -void __iomem *sun3_ioremap(unsigned long phys, unsigned long size, +void *sun3_ioremap(unsigned long phys, unsigned long size, unsigned long type) { struct vm_struct *area; @@ -102,24 +101,22 @@ void __iomem *sun3_ioremap(unsigned long phys, unsigned long size, virt += seg_pages * PAGE_SIZE; } - return (void __iomem *)ret; + return (void *)ret; } -void __iomem *__ioremap(unsigned long phys, unsigned long size, int cache) +void *__ioremap(unsigned long phys, unsigned long size, int cache) { return sun3_ioremap(phys, size, SUN3_PAGE_TYPE_IO); } -EXPORT_SYMBOL(__ioremap); -void iounmap(void __iomem *addr) +void iounmap(void *addr) { vfree((void *)(PAGE_MASK & (unsigned long)addr)); } -EXPORT_SYMBOL(iounmap); /* sun3_map_test(addr, val) -- Reads a byte from addr, storing to val, * trapping the potential read fault. Returns 0 if the access faulted, diff --git a/trunk/arch/m68k/sun3/Makefile b/trunk/arch/m68k/sun3/Makefile index be1a8470d636..4d4f0695d985 100644 --- a/trunk/arch/m68k/sun3/Makefile +++ b/trunk/arch/m68k/sun3/Makefile @@ -2,6 +2,6 @@ # Makefile for Linux arch/m68k/sun3 source directory # -obj-y := sun3ints.o sun3dvma.o sbus.o idprom.o +obj-y := sun3_ksyms.o sun3ints.o sun3dvma.o sbus.o idprom.o obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o diff --git a/trunk/arch/m68k/sun3/idprom.c b/trunk/arch/m68k/sun3/idprom.c index dca6ab6a4ede..02c1fee6fe74 100644 --- a/trunk/arch/m68k/sun3/idprom.c +++ b/trunk/arch/m68k/sun3/idprom.c @@ -6,7 +6,6 @@ * Sun3/3x models added by David Monro (davidm@psrg.cs.usyd.edu.au) */ -#include #include #include #include @@ -17,8 +16,6 @@ #include /* Fun with Sun released architectures. */ struct idprom *idprom; -EXPORT_SYMBOL(idprom); - static struct idprom idprom_buffer; /* Here is the master table of Sun machines which use some implementation diff --git a/trunk/arch/m68k/sun3/sun3_ksyms.c b/trunk/arch/m68k/sun3/sun3_ksyms.c new file mode 100644 index 000000000000..43e5a9af8abd --- /dev/null +++ b/trunk/arch/m68k/sun3/sun3_ksyms.c @@ -0,0 +1,13 @@ +#include +#include +#include +#include + +/* + * Add things here when you find the need for it. + */ +EXPORT_SYMBOL(dvma_map_align); +EXPORT_SYMBOL(dvma_unmap); +EXPORT_SYMBOL(dvma_malloc_align); +EXPORT_SYMBOL(dvma_free); +EXPORT_SYMBOL(idprom); diff --git a/trunk/arch/m68k/sun3/sun3dvma.c b/trunk/arch/m68k/sun3/sun3dvma.c index 8709677fa025..a2bc2da7f8f0 100644 --- a/trunk/arch/m68k/sun3/sun3dvma.c +++ b/trunk/arch/m68k/sun3/sun3dvma.c @@ -6,7 +6,6 @@ * Contains common routines for sun3/sun3x DVMA management. */ -#include #include #include #include @@ -313,7 +312,6 @@ inline unsigned long dvma_map_align(unsigned long kaddr, int len, int align) BUG(); return 0; } -EXPORT_SYMBOL(dvma_map_align); void dvma_unmap(void *baddr) { @@ -329,7 +327,7 @@ void dvma_unmap(void *baddr) return; } -EXPORT_SYMBOL(dvma_unmap); + void *dvma_malloc_align(unsigned long len, unsigned long align) { @@ -369,7 +367,6 @@ void *dvma_malloc_align(unsigned long len, unsigned long align) return (void *)vaddr; } -EXPORT_SYMBOL(dvma_malloc_align); void dvma_free(void *vaddr) { @@ -377,4 +374,3 @@ void dvma_free(void *vaddr) return; } -EXPORT_SYMBOL(dvma_free); diff --git a/trunk/arch/m68knommu/kernel/syscalltable.S b/trunk/arch/m68knommu/kernel/syscalltable.S index 4603f4f3c935..617e43ec95ae 100644 --- a/trunk/arch/m68knommu/kernel/syscalltable.S +++ b/trunk/arch/m68knommu/kernel/syscalltable.S @@ -296,39 +296,10 @@ ENTRY(sys_call_table) .long sys_mq_notify /* 275 */ .long sys_mq_getsetattr .long sys_waitid - .long sys_ni_syscall /* for sys_vserver */ - .long sys_add_key - .long sys_request_key /* 280 */ - .long sys_keyctl - .long sys_ioprio_set - .long sys_ioprio_get - .long sys_inotify_init - .long sys_inotify_add_watch /* 285 */ - .long sys_inotify_rm_watch - .long sys_migrate_pages - .long sys_openat - .long sys_mkdirat - .long sys_mknodat /* 290 */ - .long sys_fchownat - .long sys_futimesat - .long sys_fstatat64 - .long sys_unlinkat - .long sys_renameat /* 295 */ - .long sys_linkat - .long sys_symlinkat - .long sys_readlinkat - .long sys_fchmodat - .long sys_faccessat /* 300 */ - .long sys_ni_syscall /* Reserved for pselect6 */ - .long sys_ni_syscall /* Reserved for ppoll */ - .long sys_unshare - .long sys_set_robust_list - .long sys_get_robust_list /* 305 */ - .long sys_splice - .long sys_sync_file_range - .long sys_tee - .long sys_vmsplice - .long sys_move_pages /* 310 */ + .long sys_ni_syscall /* sys_setaltroot */ + .long sys_ni_syscall /* sys_add_key */ + .long sys_ni_syscall /* 280 */ /* sys_request_key */ + .long sys_ni_syscall /* sys_keyctl */ .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff --git a/trunk/arch/mips/Makefile b/trunk/arch/mips/Makefile index 641aa30b3638..2124350ab94d 100644 --- a/trunk/arch/mips/Makefile +++ b/trunk/arch/mips/Makefile @@ -91,17 +91,8 @@ cflags-y += -ffreestanding # carefully avoid to add it redundantly because gcc 3.3/3.4 complains # when fed the toolchain default! # -# Certain gcc versions upto gcc 4.1.1 (probably 4.2-subversion as of -# 2006-10-10 don't properly change the the predefined symbols if -EB / -EL -# are used, so we kludge that here. A bug has been filed at -# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29413. -# -undef-all += -UMIPSEB -U_MIPSEB -U__MIPSEB -U__MIPSEB__ -undef-all += -UMIPSEL -U_MIPSEL -U__MIPSEL -U__MIPSEL__ -predef-be += -DMIPSEB -D_MIPSEB -D__MIPSEB -D__MIPSEB__ -predef-le += -DMIPSEL -D_MIPSEL -D__MIPSEL -D__MIPSEL__ -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be)) -cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le)) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB -D__MIPSEB__) +cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL -D__MIPSEL__) cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \ -fno-omit-frame-pointer diff --git a/trunk/arch/mips/configs/bigsur_defconfig b/trunk/arch/mips/configs/bigsur_defconfig index ba3bf733d27d..c6a015940b41 100644 --- a/trunk/arch/mips/configs/bigsur_defconfig +++ b/trunk/arch/mips/configs/bigsur_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.19-rc1 -# Wed Oct 11 01:41:41 2006 +# Linux kernel version: 2.6.18-rc1 +# Thu Jul 6 10:02:58 2006 # CONFIG_MIPS=y @@ -25,6 +25,8 @@ CONFIG_MIPS=y # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MIPS_EV64120 is not set +# CONFIG_MIPS_IVR is not set +# CONFIG_MIPS_ITE8172 is not set # CONFIG_MACH_JAZZ is not set # CONFIG_LASAT is not set # CONFIG_MIPS_ATLAS is not set @@ -81,7 +83,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_TIME=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_COHERENT=y CONFIG_CPU_BIG_ENDIAN=y @@ -131,8 +132,8 @@ CONFIG_PAGE_SIZE_4KB=y # CONFIG_PAGE_SIZE_64KB is not set # CONFIG_SIBYTE_DMA_PAGEOPS is not set CONFIG_MIPS_MT_DISABLED=y -# CONFIG_MIPS_MT_SMP is not set # CONFIG_MIPS_MT_SMTC is not set +# CONFIG_MIPS_MT_SMP is not set # CONFIG_MIPS_VPE_LOADER is not set CONFIG_CPU_HAS_LLSC=y CONFIG_CPU_HAS_SYNC=y @@ -184,11 +185,9 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_IPC_NS is not set # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set +CONFIG_SYSCTL=y # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -196,9 +195,7 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SYSCTL=y CONFIG_EMBEDDED=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -207,12 +204,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y +CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y -CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -231,7 +228,6 @@ CONFIG_STOP_MACHINE=y # # Block layer # -CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # @@ -253,17 +249,18 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_HW_HAS_PCI=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -# CONFIG_PCI_MULTITHREAD_PROBE is not set CONFIG_PCI_DEBUG=y CONFIG_MMU=y # # PCCARD (PCMCIA/CardBus) support # +# CONFIG_PCCARD is not set # # PCI Hotplug Support # +# CONFIG_HOTPLUG_PCI is not set # # Executable file formats @@ -274,7 +271,7 @@ CONFIG_BINFMT_ELF=y CONFIG_MIPS32_COMPAT=y CONFIG_COMPAT=y CONFIG_MIPS32_O32=y -CONFIG_MIPS32_N32=y +# CONFIG_MIPS32_N32 is not set CONFIG_BINFMT_ELF32=y # @@ -291,7 +288,6 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m -# CONFIG_XFRM_SUB_POLICY is not set CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IP_MULTICAST is not set @@ -312,12 +308,10 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_TUNNEL is not set CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set @@ -347,6 +341,7 @@ CONFIG_NETWORK_SECMARK=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -373,6 +368,7 @@ CONFIG_NETWORK_SECMARK=y # CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set # CONFIG_DEBUG_DRIVER is not set # CONFIG_SYS_HYPERVISOR is not set @@ -408,7 +404,7 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_RAM is not set -CONFIG_BLK_DEV_INITRD=y +# CONFIG_BLK_DEV_INITRD is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set @@ -416,7 +412,6 @@ CONFIG_BLK_DEV_INITRD=y # ATA/ATAPI/MFM/RLL support # CONFIG_IDE=y -CONFIG_IDE_MAX_HWIFS=4 CONFIG_BLK_DEV_IDE=y # @@ -434,40 +429,10 @@ CONFIG_BLK_DEV_IDEFLOPPY=y # IDE chipset support/bugfixes # CONFIG_IDE_GENERIC=y -CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set -# CONFIG_BLK_DEV_OFFBOARD is not set -CONFIG_BLK_DEV_GENERIC=y -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_PCI_AUTO is not set -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -# CONFIG_BLK_DEV_AMD74XX is not set -CONFIG_BLK_DEV_CMD64X=y -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -# CONFIG_BLK_DEV_PIIX is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -# CONFIG_BLK_DEV_PDC202XX_NEW is not set -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_BLK_DEV_IDEPCI is not set # CONFIG_BLK_DEV_IDE_SWARM is not set # CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set +# CONFIG_BLK_DEV_IDEDMA is not set # CONFIG_IDEDMA_AUTO is not set # CONFIG_BLK_DEV_HD is not set @@ -476,12 +441,6 @@ CONFIG_BLK_DEV_IDEDMA=y # # CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set -# CONFIG_SCSI_NETLINK is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# -# CONFIG_ATA is not set # # Multi-device support (RAID and LVM) @@ -557,7 +516,6 @@ CONFIG_NET_SB1250_MAC=y # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set -# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -692,6 +650,7 @@ CONFIG_I2C_CHARDEV=y # CONFIG_I2C_ALGOBIT is not set # CONFIG_I2C_ALGOPCF is not set # CONFIG_I2C_ALGOPCA is not set +CONFIG_I2C_ALGO_SIBYTE=y # # I2C Hardware Bus support @@ -753,12 +712,12 @@ CONFIG_I2C_DEBUG_CHIP=y # # Misc devices # -# CONFIG_TIFM_CORE is not set # # Multimedia devices # # CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices @@ -770,7 +729,6 @@ CONFIG_I2C_DEBUG_CHIP=y # # CONFIG_FIRMWARE_EDID is not set # CONFIG_FB is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -853,7 +811,6 @@ CONFIG_FS_MBCACHE=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -883,10 +840,8 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y -CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y -CONFIG_TMPFS=y -# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_TMPFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set @@ -896,7 +851,6 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set -# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -927,6 +881,7 @@ CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set +# CONFIG_CIFS_DEBUG2 is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -943,10 +898,6 @@ CONFIG_MSDOS_PARTITION=y # # CONFIG_NLS is not set -# -# Distributed Lock Manager -# - # # Profiling support # @@ -956,8 +907,7 @@ CONFIG_MSDOS_PARTITION=y # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y -# CONFIG_PRINTK_TIME is not set -CONFIG_ENABLE_MUST_CHECK=y +CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y @@ -970,15 +920,12 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_SPINLOCK is not set CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_RWSEMS is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_PROVE_LOCKING is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_LIST is not set CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set CONFIG_CROSSCOMPILE=y @@ -999,10 +946,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y # Cryptographic options # CONFIG_CRYPTO=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_BLKCIPHER=m -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_MD4=y @@ -1012,12 +955,9 @@ CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=y CONFIG_CRYPTO_TWOFISH=y -CONFIG_CRYPTO_TWOFISH_COMMON=y CONFIG_CRYPTO_SERPENT=y CONFIG_CRYPTO_AES=m # CONFIG_CRYPTO_CAST5 is not set diff --git a/trunk/arch/mips/jazz/setup.c b/trunk/arch/mips/jazz/setup.c index d848f1a07786..6dc4135d6e11 100644 --- a/trunk/arch/mips/jazz/setup.c +++ b/trunk/arch/mips/jazz/setup.c @@ -37,7 +37,7 @@ extern void jazz_machine_restart(char *command); extern void jazz_machine_halt(void); extern void jazz_machine_power_off(void); -void __init plat_timer_setup(struct irqaction *irq) +void __init plat_time_init(struct irqaction *irq) { /* set the clock to 100 Hz */ r4030_write_reg32(JAZZ_TIMER_INTERVAL, 9); diff --git a/trunk/arch/mips/kernel/smp.c b/trunk/arch/mips/kernel/smp.c index db80957ada89..1af3612a1ce8 100644 --- a/trunk/arch/mips/kernel/smp.c +++ b/trunk/arch/mips/kernel/smp.c @@ -310,7 +310,7 @@ static void flush_tlb_all_ipi(void *info) void flush_tlb_all(void) { - on_each_cpu(flush_tlb_all_ipi, NULL, 1, 1); + on_each_cpu(flush_tlb_all_ipi, 0, 1, 1); } static void flush_tlb_mm_ipi(void *mm) diff --git a/trunk/arch/mips/sgi-ip27/ip27-klnuma.c b/trunk/arch/mips/sgi-ip27/ip27-klnuma.c index f9f404a8ddad..d777b7d1a9fe 100644 --- a/trunk/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/trunk/arch/mips/sgi-ip27/ip27-klnuma.c @@ -26,7 +26,7 @@ static cpumask_t ktext_repmask; * kernel. For example, we should never put a copy on a headless node, * and we should respect the topology of the machine. */ -void __init setup_replication_mask(void) +void __init setup_replication_mask() { cnodeid_t cnode; diff --git a/trunk/arch/mips/sibyte/bcm1480/smp.c b/trunk/arch/mips/sibyte/bcm1480/smp.c index bf328277c775..6eac36d1b8c8 100644 --- a/trunk/arch/mips/sibyte/bcm1480/smp.c +++ b/trunk/arch/mips/sibyte/bcm1480/smp.c @@ -34,21 +34,21 @@ extern void smp_call_function_interrupt(void); * independent of board/firmware */ -static volatile void *mailbox_0_set_regs[] = { +static void *mailbox_0_set_regs[] = { IOADDR(A_BCM1480_IMR_CPU0_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), IOADDR(A_BCM1480_IMR_CPU1_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), IOADDR(A_BCM1480_IMR_CPU2_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), IOADDR(A_BCM1480_IMR_CPU3_BASE + R_BCM1480_IMR_MAILBOX_0_SET_CPU), }; -static volatile void *mailbox_0_clear_regs[] = { +static void *mailbox_0_clear_regs[] = { IOADDR(A_BCM1480_IMR_CPU0_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), IOADDR(A_BCM1480_IMR_CPU1_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), IOADDR(A_BCM1480_IMR_CPU2_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), IOADDR(A_BCM1480_IMR_CPU3_BASE + R_BCM1480_IMR_MAILBOX_0_CLR_CPU), }; -static volatile void *mailbox_0_regs[] = { +static void *mailbox_0_regs[] = { IOADDR(A_BCM1480_IMR_CPU0_BASE + R_BCM1480_IMR_MAILBOX_0_CPU), IOADDR(A_BCM1480_IMR_CPU1_BASE + R_BCM1480_IMR_MAILBOX_0_CPU), IOADDR(A_BCM1480_IMR_CPU2_BASE + R_BCM1480_IMR_MAILBOX_0_CPU), diff --git a/trunk/arch/parisc/kernel/parisc_ksyms.c b/trunk/arch/parisc/kernel/parisc_ksyms.c index 8f6a0b312f7a..6d57553d8ef8 100644 --- a/trunk/arch/parisc/kernel/parisc_ksyms.c +++ b/trunk/arch/parisc/kernel/parisc_ksyms.c @@ -69,6 +69,10 @@ EXPORT_SYMBOL(memcpy_toio); EXPORT_SYMBOL(memcpy_fromio); EXPORT_SYMBOL(memset_io); +#include +EXPORT_SYMBOL(sys_lseek); +EXPORT_SYMBOL(sys_write); + #include EXPORT_SYMBOL(__up); EXPORT_SYMBOL(__down_interruptible); diff --git a/trunk/arch/powerpc/mm/mem.c b/trunk/arch/powerpc/mm/mem.c index d1c0758c5611..16fe027bbc12 100644 --- a/trunk/arch/powerpc/mm/mem.c +++ b/trunk/arch/powerpc/mm/mem.c @@ -307,12 +307,11 @@ void __init paging_init(void) top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; - max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; + max_zone_pfns[0] = total_lowmem >> PAGE_SHIFT; + max_zone_pfns[1] = top_of_ram >> PAGE_SHIFT; #else - max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + max_zone_pfns[0] = top_of_ram >> PAGE_SHIFT; #endif free_area_init_nodes(max_zone_pfns); } diff --git a/trunk/arch/powerpc/mm/numa.c b/trunk/arch/powerpc/mm/numa.c index 9da01dc8cfd9..43c272075e1a 100644 --- a/trunk/arch/powerpc/mm/numa.c +++ b/trunk/arch/powerpc/mm/numa.c @@ -617,9 +617,9 @@ void __init do_init_bootmem(void) void __init paging_init(void) { - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + lmb_end_of_DRAM() >> PAGE_SHIFT + }; free_area_init_nodes(max_zone_pfns); } diff --git a/trunk/arch/ppc/mm/init.c b/trunk/arch/ppc/mm/init.c index c374e53ae03a..410200046af1 100644 --- a/trunk/arch/ppc/mm/init.c +++ b/trunk/arch/ppc/mm/init.c @@ -374,12 +374,11 @@ void __init paging_init(void) end_pfn = start_pfn + (total_memory >> PAGE_SHIFT); add_active_range(0, start_pfn, end_pfn); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; - max_zone_pfns[ZONE_HIGHMEM] = total_memory >> PAGE_SHIFT; + max_zone_pfns[0] = total_lowmem >> PAGE_SHIFT; + max_zone_pfns[1] = total_memory >> PAGE_SHIFT; #else - max_zone_pfns[ZONE_DMA] = total_memory >> PAGE_SHIFT; + max_zone_pfns[0] = total_memory >> PAGE_SHIFT; #endif /* CONFIG_HIGHMEM */ free_area_init_nodes(max_zone_pfns); } diff --git a/trunk/arch/s390/appldata/appldata_base.c b/trunk/arch/s390/appldata/appldata_base.c index 45c9fa7d7545..2b1e6c9a6e0e 100644 --- a/trunk/arch/s390/appldata/appldata_base.c +++ b/trunk/arch/s390/appldata/appldata_base.c @@ -109,7 +109,7 @@ static LIST_HEAD(appldata_ops_list); * * schedule work and reschedule timer */ -static void appldata_timer_function(unsigned long data) +static void appldata_timer_function(unsigned long data, struct pt_regs *regs) { P_DEBUG(" -= Timer =-\n"); P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(), diff --git a/trunk/arch/s390/kernel/s390_ext.c b/trunk/arch/s390/kernel/s390_ext.c index 4faf96f8a834..c49ab8c784d2 100644 --- a/trunk/arch/s390/kernel/s390_ext.c +++ b/trunk/arch/s390/kernel/s390_ext.c @@ -117,8 +117,8 @@ void do_extint(struct pt_regs *regs, unsigned short code) int index; struct pt_regs *old_regs; - old_regs = set_irq_regs(regs); irq_enter(); + old_regs = set_irq_regs(regs); asm volatile ("mc 0,0"); if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) /** @@ -134,8 +134,8 @@ void do_extint(struct pt_regs *regs, unsigned short code) p->handler(code); } } - irq_exit(); set_irq_regs(old_regs); + irq_exit(); } EXPORT_SYMBOL(register_external_interrupt); diff --git a/trunk/arch/s390/kernel/s390_ksyms.c b/trunk/arch/s390/kernel/s390_ksyms.c index 90b5ef529eb7..9f19e833a562 100644 --- a/trunk/arch/s390/kernel/s390_ksyms.c +++ b/trunk/arch/s390/kernel/s390_ksyms.c @@ -51,3 +51,4 @@ EXPORT_SYMBOL(csum_fold); EXPORT_SYMBOL(console_mode); EXPORT_SYMBOL(console_devno); EXPORT_SYMBOL(console_irq); +EXPORT_SYMBOL(sys_wait4); diff --git a/trunk/arch/s390/kernel/stacktrace.c b/trunk/arch/s390/kernel/stacktrace.c index 0d14a4789bf2..d9428a0fc8fb 100644 --- a/trunk/arch/s390/kernel/stacktrace.c +++ b/trunk/arch/s390/kernel/stacktrace.c @@ -62,26 +62,27 @@ static inline unsigned long save_context_stack(struct stack_trace *trace, void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { register unsigned long sp asm ("15"); - unsigned long orig_sp, new_sp; + unsigned long orig_sp; - orig_sp = sp & PSW_ADDR_INSN; + sp &= PSW_ADDR_INSN; + orig_sp = sp; - new_sp = save_context_stack(trace, &trace->skip, orig_sp, + sp = save_context_stack(trace, &trace->skip, sp, S390_lowcore.panic_stack - PAGE_SIZE, S390_lowcore.panic_stack); - if ((new_sp != orig_sp) && !trace->all_contexts) + if ((sp != orig_sp) && !trace->all_contexts) return; - new_sp = save_context_stack(trace, &trace->skip, new_sp, + sp = save_context_stack(trace, &trace->skip, sp, S390_lowcore.async_stack - ASYNC_SIZE, S390_lowcore.async_stack); - if ((new_sp != orig_sp) && !trace->all_contexts) + if ((sp != orig_sp) && !trace->all_contexts) return; if (task) - save_context_stack(trace, &trace->skip, new_sp, + save_context_stack(trace, &trace->skip, sp, (unsigned long) task_stack_page(task), (unsigned long) task_stack_page(task) + THREAD_SIZE); else - save_context_stack(trace, &trace->skip, new_sp, + save_context_stack(trace, &trace->skip, sp, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); return; diff --git a/trunk/arch/s390/kernel/vtime.c b/trunk/arch/s390/kernel/vtime.c index 21baaf5496d6..1d7d3938b2b1 100644 --- a/trunk/arch/s390/kernel/vtime.c +++ b/trunk/arch/s390/kernel/vtime.c @@ -209,11 +209,11 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) * Do the callback functions of expired vtimer events. * Called from within the interrupt handler. */ -static void do_callbacks(struct list_head *cb_list) +static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs) { struct vtimer_queue *vt_list; struct vtimer_list *event, *tmp; - void (*fn)(unsigned long); + void (*fn)(unsigned long, struct pt_regs*); unsigned long data; if (list_empty(cb_list)) @@ -224,7 +224,7 @@ static void do_callbacks(struct list_head *cb_list) list_for_each_entry_safe(event, tmp, cb_list, entry) { fn = event->function; data = event->data; - fn(data); + fn(data, regs); if (!event->interval) /* delete one shot timer */ @@ -275,7 +275,7 @@ static void do_cpu_timer_interrupt(__u16 error_code) list_move_tail(&event->entry, &cb_list); } spin_unlock(&vt_list->lock); - do_callbacks(&cb_list); + do_callbacks(&cb_list, get_irq_regs()); /* next event is first in list */ spin_lock(&vt_list->lock); diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig index 6a461d4caeff..f6a0c4436168 100644 --- a/trunk/arch/sh/Kconfig +++ b/trunk/arch/sh/Kconfig @@ -45,9 +45,6 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_IOMAP bool -config GENERIC_TIME - def_bool n - config ARCH_MAY_HAVE_PC_FDC bool @@ -360,7 +357,6 @@ config CPU_HAS_SR_RB endmenu menu "Timer support" -depends on !GENERIC_TIME config SH_TMU bool "TMU timer support" diff --git a/trunk/arch/sh/boards/hp6xx/hp6xx_apm.c b/trunk/arch/sh/boards/hp6xx/hp6xx_apm.c index 219179114f0f..75f91aaae077 100644 --- a/trunk/arch/sh/boards/hp6xx/hp6xx_apm.c +++ b/trunk/arch/sh/boards/hp6xx/hp6xx_apm.c @@ -83,7 +83,7 @@ static int hp6x0_apm_get_info(char *buf, char **start, off_t fpos, int length) return p - buf; } -static irqreturn_t hp6x0_apm_interrupt(int irq, void *dev) +static irqreturn_t hp6x0_apm_interrupt(int irq, void *dev, struct pt_regs *regs) { if (!apm_suspended) apm_queue_event(APM_USER_SUSPEND); @@ -96,7 +96,7 @@ static int __init hp6x0_apm_init(void) int ret; ret = request_irq(HP680_BTN_IRQ, hp6x0_apm_interrupt, - IRQF_DISABLED, MODNAME, 0); + SA_INTERRUPT, MODNAME, 0); if (unlikely(ret < 0)) { printk(KERN_ERR MODNAME ": IRQ %d request failed\n", HP680_BTN_IRQ); diff --git a/trunk/arch/sh/boards/landisk/landisk_pwb.c b/trunk/arch/sh/boards/landisk/landisk_pwb.c index e62524978160..0b7bee1a9ca5 100644 --- a/trunk/arch/sh/boards/landisk/landisk_pwb.c +++ b/trunk/arch/sh/boards/landisk/landisk_pwb.c @@ -135,7 +135,7 @@ static int swdrv_write(struct file *filp, const char *buff, size_t count, return count; } -static irqreturn_t sw_interrupt(int irq, void *dev_id) +static irqreturn_t sw_interrupt(int irq, void *dev_id, struct pt_regs *regs) { landisk_btn = (0x0ff & (~ctrl_inb(PA_STATUS))); disable_irq(IRQ_BUTTON); diff --git a/trunk/arch/sh/boards/mpc1211/setup.c b/trunk/arch/sh/boards/mpc1211/setup.c index 7c3d1d304157..01c10fa5c058 100644 --- a/trunk/arch/sh/boards/mpc1211/setup.c +++ b/trunk/arch/sh/boards/mpc1211/setup.c @@ -69,6 +69,7 @@ static void __init pci_write_config(unsigned long busNo, static unsigned char m_irq_mask = 0xfb; static unsigned char s_irq_mask = 0xff; +volatile unsigned long irq_err_count; static void disable_mpc1211_irq(unsigned int irq) { @@ -117,7 +118,7 @@ static void mask_and_ack_mpc1211(unsigned int irq) if(irq < 8) { if(m_irq_mask & (1< #include #include +#include +#include #ifdef CONFIG_SH_R7780MP static int mask_pos[] = {12, 11, 9, 14, 15, 8, 13, 6, 5, 4, 3, 2, 0, 0, 1, 0}; @@ -18,26 +20,71 @@ static int mask_pos[] = {12, 11, 9, 14, 15, 8, 13, 6, 5, 4, 3, 2, 0, 0, 1, 0}; static int mask_pos[] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 5, 6, 4, 0, 1, 2, 0}; #endif -static void enable_r7780rp_irq(unsigned int irq) +static void enable_r7780rp_irq(unsigned int irq); +static void disable_r7780rp_irq(unsigned int irq); + +/* shutdown is same as "disable" */ +#define shutdown_r7780rp_irq disable_r7780rp_irq + +static void ack_r7780rp_irq(unsigned int irq); +static void end_r7780rp_irq(unsigned int irq); + +static unsigned int startup_r7780rp_irq(unsigned int irq) { - /* Set priority in IPR back to original value */ - ctrl_outw(ctrl_inw(IRLCNTR1) | (1 << mask_pos[irq]), IRLCNTR1); + enable_r7780rp_irq(irq); + return 0; /* never anything pending */ } static void disable_r7780rp_irq(unsigned int irq) { + unsigned short val; + unsigned short mask = 0xffff ^ (0x0001 << mask_pos[irq]); + /* Set the priority in IPR to 0 */ - ctrl_outw(ctrl_inw(IRLCNTR1) & (0xffff ^ (1 << mask_pos[irq])), - IRLCNTR1); + val = ctrl_inw(IRLCNTR1); + val &= mask; + ctrl_outw(val, IRLCNTR1); } -static struct irq_chip r7780rp_irq_chip __read_mostly = { - .name = "r7780rp", - .mask = disable_r7780rp_irq, - .unmask = enable_r7780rp_irq, - .mask_ack = disable_r7780rp_irq, +static void enable_r7780rp_irq(unsigned int irq) +{ + unsigned short val; + unsigned short value = (0x0001 << mask_pos[irq]); + + /* Set priority in IPR back to original value */ + val = ctrl_inw(IRLCNTR1); + val |= value; + ctrl_outw(val, IRLCNTR1); +} + +static void ack_r7780rp_irq(unsigned int irq) +{ + disable_r7780rp_irq(irq); +} + +static void end_r7780rp_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + enable_r7780rp_irq(irq); +} + +static struct hw_interrupt_type r7780rp_irq_type = { + .typename = "R7780RP-IRQ", + .startup = startup_r7780rp_irq, + .shutdown = shutdown_r7780rp_irq, + .enable = enable_r7780rp_irq, + .disable = disable_r7780rp_irq, + .ack = ack_r7780rp_irq, + .end = end_r7780rp_irq, }; +static void make_r7780rp_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + irq_desc[irq].chip = &r7780rp_irq_type; + disable_r7780rp_irq(irq); +} + /* * Initialize IRQ setting */ @@ -45,10 +92,24 @@ void __init init_r7780rp_IRQ(void) { int i; - for (i = 0; i < 15; i++) { - disable_irq_nosync(i); - set_irq_chip_and_handler(i, &r7780rp_irq_chip, - handle_level_irq); - enable_r7780rp_irq(i); - } + /* IRL0=PCI Slot #A + * IRL1=PCI Slot #B + * IRL2=PCI Slot #C + * IRL3=PCI Slot #D + * IRL4=CF Card + * IRL5=CF Card Insert + * IRL6=M66596 + * IRL7=SD Card + * IRL8=Touch Panel + * IRL9=SCI + * IRL10=Serial + * IRL11=Extention #A + * IRL11=Extention #B + * IRL12=Debug LAN + * IRL13=Push Switch + * IRL14=ZiggBee IO + */ + + for (i=0; i<15; i++) + make_r7780rp_irq(i); } diff --git a/trunk/arch/sh/boards/snapgear/setup.c b/trunk/arch/sh/boards/snapgear/setup.c index 540d0bf16446..f5e98c56b530 100644 --- a/trunk/arch/sh/boards/snapgear/setup.c +++ b/trunk/arch/sh/boards/snapgear/setup.c @@ -33,7 +33,7 @@ extern void pcibios_init(void); * EraseConfig handling functions */ -static irqreturn_t eraseconfig_interrupt(int irq, void *dev_id) +static irqreturn_t eraseconfig_interrupt(int irq, void *dev_id, struct pt_regs *regs) { volatile char dummy __attribute__((unused)) = * (volatile char *) 0xb8000000; diff --git a/trunk/arch/sh/cchips/hd6446x/hd64461/setup.c b/trunk/arch/sh/cchips/hd6446x/hd64461/setup.c index 4d49b5cbcc13..38f1e8171a3a 100644 --- a/trunk/arch/sh/cchips/hd6446x/hd64461/setup.c +++ b/trunk/arch/sh/cchips/hd6446x/hd64461/setup.c @@ -71,7 +71,7 @@ static struct hw_interrupt_type hd64461_irq_type = { .end = end_hd64461_irq, }; -static irqreturn_t hd64461_interrupt(int irq, void *dev_id) +static irqreturn_t hd64461_interrupt(int irq, void *dev_id, struct pt_regs *regs) { printk(KERN_INFO "HD64461: spurious interrupt, nirr: 0x%x nimr: 0x%x\n", diff --git a/trunk/arch/sh/cchips/hd6446x/hd64465/gpio.c b/trunk/arch/sh/cchips/hd6446x/hd64465/gpio.c index 43431855ec86..72320d02d69a 100644 --- a/trunk/arch/sh/cchips/hd6446x/hd64465/gpio.c +++ b/trunk/arch/sh/cchips/hd6446x/hd64465/gpio.c @@ -85,7 +85,7 @@ static struct { void *dev; } handlers[GPIO_NPORTS * 8]; -static irqreturn_t hd64465_gpio_interrupt(int irq, void *dev) +static irqreturn_t hd64465_gpio_interrupt(int irq, void *dev, struct pt_regs *regs) { unsigned short port, pin, isr, mask, portpin; diff --git a/trunk/arch/sh/cchips/hd6446x/hd64465/setup.c b/trunk/arch/sh/cchips/hd6446x/hd64465/setup.c index d126e1f30dee..30573d3e1966 100644 --- a/trunk/arch/sh/cchips/hd6446x/hd64465/setup.c +++ b/trunk/arch/sh/cchips/hd6446x/hd64465/setup.c @@ -84,7 +84,7 @@ static struct hw_interrupt_type hd64465_irq_type = { }; -static irqreturn_t hd64465_interrupt(int irq, void *dev_id) +static irqreturn_t hd64465_interrupt(int irq, void *dev_id, struct pt_regs *regs) { printk(KERN_INFO "HD64465: spurious interrupt, nirr: 0x%x nimr: 0x%x\n", diff --git a/trunk/arch/sh/cchips/voyagergx/irq.c b/trunk/arch/sh/cchips/voyagergx/irq.c index bf1b28feca06..392c8b12ce36 100644 --- a/trunk/arch/sh/cchips/voyagergx/irq.c +++ b/trunk/arch/sh/cchips/voyagergx/irq.c @@ -88,7 +88,8 @@ static struct hw_interrupt_type voyagergx_irq_type = { .end = end_voyagergx_irq, }; -static irqreturn_t voyagergx_interrupt(int irq, void *dev_id) +static irqreturn_t voyagergx_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { printk(KERN_INFO "VoyagerGX: spurious interrupt, status: 0x%x\n", diff --git a/trunk/arch/sh/drivers/dma/dma-g2.c b/trunk/arch/sh/drivers/dma/dma-g2.c index 0caf11bb7e27..9cb070924180 100644 --- a/trunk/arch/sh/drivers/dma/dma-g2.c +++ b/trunk/arch/sh/drivers/dma/dma-g2.c @@ -51,7 +51,7 @@ static volatile struct g2_dma_info *g2_dma = (volatile struct g2_dma_info *)0xa0 ((g2_dma->channel[i].size - \ g2_dma->status[i].size) & 0x0fffffff) -static irqreturn_t g2_dma_interrupt(int irq, void *dev_id) +static irqreturn_t g2_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs) { int i; diff --git a/trunk/arch/sh/drivers/dma/dma-pvr2.c b/trunk/arch/sh/drivers/dma/dma-pvr2.c index 838fad566eaf..c1b6bc23c107 100644 --- a/trunk/arch/sh/drivers/dma/dma-pvr2.c +++ b/trunk/arch/sh/drivers/dma/dma-pvr2.c @@ -21,7 +21,7 @@ static unsigned int xfer_complete; static int count; -static irqreturn_t pvr2_dma_interrupt(int irq, void *dev_id) +static irqreturn_t pvr2_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs) { if (get_dma_residue(PVR2_CASCADE_CHAN)) { printk(KERN_WARNING "DMA: SH DMAC did not complete transfer " diff --git a/trunk/arch/sh/drivers/dma/dma-sh.c b/trunk/arch/sh/drivers/dma/dma-sh.c index d8ece20bb2cf..cbbe8bce3d67 100644 --- a/trunk/arch/sh/drivers/dma/dma-sh.c +++ b/trunk/arch/sh/drivers/dma/dma-sh.c @@ -60,9 +60,9 @@ static inline unsigned int calc_xmit_shift(struct dma_channel *chan) * Besides that it needs to waken any waiting process, which should handle * setting up the next transfer. */ -static irqreturn_t dma_tei(int irq, void *dev_id) +static irqreturn_t dma_tei(int irq, void *dev_id, struct pt_regs *regs) { - struct dma_channel *chan = dev_id; + struct dma_channel *chan = (struct dma_channel *)dev_id; u32 chcr; chcr = ctrl_inl(CHCR[chan->chan]); @@ -228,7 +228,7 @@ static inline int dmaor_reset(void) } #if defined(CONFIG_CPU_SH4) -static irqreturn_t dma_err(int irq, void *dummy) +static irqreturn_t dma_err(int irq, void *dev_id, struct pt_regs *regs) { dmaor_reset(); disable_irq(irq); diff --git a/trunk/arch/sh/drivers/pci/pci-sh7751.c b/trunk/arch/sh/drivers/pci/pci-sh7751.c index 85e1ee2e2e7b..dbe837884983 100644 --- a/trunk/arch/sh/drivers/pci/pci-sh7751.c +++ b/trunk/arch/sh/drivers/pci/pci-sh7751.c @@ -155,7 +155,7 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) */ pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n", PCIBIOS_MIN_IO, (64 << 10), - SH7751_PCI_IO_BASE + PCIBIOS_MIN_IO); + SH4_PCI_IO_BASE + PCIBIOS_MIN_IO); /* * XXX: For now, leave this board-specific. In the event we have other @@ -163,7 +163,7 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) */ #ifdef CONFIG_SH_BIGSUR bigsur_port_map(PCIBIOS_MIN_IO, (64 << 10), - SH7751_PCI_IO_BASE + PCIBIOS_MIN_IO, 0); + SH4_PCI_IO_BASE + PCIBIOS_MIN_IO, 0); #endif /* Make sure the MSB's of IO window are set to access PCI space diff --git a/trunk/arch/sh/drivers/pci/pci-st40.c b/trunk/arch/sh/drivers/pci/pci-st40.c index efecb3d5995c..4ab5ea6b35fb 100644 --- a/trunk/arch/sh/drivers/pci/pci-st40.c +++ b/trunk/arch/sh/drivers/pci/pci-st40.c @@ -161,7 +161,7 @@ static char * pci_commands[16]={ "Memory Write-and-Invalidate" }; -static irqreturn_t st40_pci_irq(int irq, void *dev_instance) +static irqreturn_t st40_pci_irq(int irq, void *dev_instance, struct pt_regs *regs) { unsigned pci_int, pci_air, pci_cir, pci_aint; static int count=0; diff --git a/trunk/arch/sh/kernel/cpu/irq/intc2.c b/trunk/arch/sh/kernel/cpu/irq/intc2.c index d4b2bb7e08c7..e30e4b7aa70e 100644 --- a/trunk/arch/sh/kernel/cpu/irq/intc2.c +++ b/trunk/arch/sh/kernel/cpu/irq/intc2.c @@ -10,32 +10,93 @@ * These are the "new Hitachi style" interrupts, as present on the * Hitachi 7751, the STM ST40 STB1, SH7760, and SH7780. */ + #include #include #include #include #include +#include + +struct intc2_data { + unsigned char msk_offset; + unsigned char msk_shift; + + int (*clear_irq) (int); +}; + +static struct intc2_data intc2_data[NR_INTC2_IRQS]; + +static void enable_intc2_irq(unsigned int irq); +static void disable_intc2_irq(unsigned int irq); + +/* shutdown is same as "disable" */ +#define shutdown_intc2_irq disable_intc2_irq + +static void mask_and_ack_intc2(unsigned int); +static void end_intc2_irq(unsigned int irq); + +static unsigned int startup_intc2_irq(unsigned int irq) +{ + enable_intc2_irq(irq); + return 0; /* never anything pending */ +} + +static struct hw_interrupt_type intc2_irq_type = { + .typename = "INTC2-IRQ", + .startup = startup_intc2_irq, + .shutdown = shutdown_intc2_irq, + .enable = enable_intc2_irq, + .disable = disable_intc2_irq, + .ack = mask_and_ack_intc2, + .end = end_intc2_irq +}; static void disable_intc2_irq(unsigned int irq) { - struct intc2_data *p = get_irq_chip_data(irq); - ctrl_outl(1 << p->msk_shift, - INTC2_BASE + INTC2_INTMSK_OFFSET + p->msk_offset); + int irq_offset = irq - INTC2_FIRST_IRQ; + int msk_shift, msk_offset; + + /* Sanity check */ + if (unlikely(irq_offset < 0 || irq_offset >= NR_INTC2_IRQS)) + return; + + msk_shift = intc2_data[irq_offset].msk_shift; + msk_offset = intc2_data[irq_offset].msk_offset; + + ctrl_outl(1 << msk_shift, + INTC2_BASE + INTC2_INTMSK_OFFSET + msk_offset); } static void enable_intc2_irq(unsigned int irq) { - struct intc2_data *p = get_irq_chip_data(irq); - ctrl_outl(1 << p->msk_shift, - INTC2_BASE + INTC2_INTMSKCLR_OFFSET + p->msk_offset); + int irq_offset = irq - INTC2_FIRST_IRQ; + int msk_shift, msk_offset; + + /* Sanity check */ + if (unlikely(irq_offset < 0 || irq_offset >= NR_INTC2_IRQS)) + return; + + msk_shift = intc2_data[irq_offset].msk_shift; + msk_offset = intc2_data[irq_offset].msk_offset; + + ctrl_outl(1 << msk_shift, + INTC2_BASE + INTC2_INTMSKCLR_OFFSET + msk_offset); } -static struct irq_chip intc2_irq_chip = { - .typename = "intc2", - .mask = disable_intc2_irq, - .unmask = enable_intc2_irq, - .mask_ack = disable_intc2_irq, -}; +static void mask_and_ack_intc2(unsigned int irq) +{ + disable_intc2_irq(irq); +} + +static void end_intc2_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + enable_intc2_irq(irq); + + if (unlikely(intc2_data[irq - INTC2_FIRST_IRQ].clear_irq)) + intc2_data[irq - INTC2_FIRST_IRQ].clear_irq(irq); +} /* * Setup an INTC2 style interrupt. @@ -47,30 +108,46 @@ static struct irq_chip intc2_irq_chip = { * | | | | * make_intc2_irq(84, 0, 16, 0, 13); */ -void make_intc2_irq(struct intc2_data *p) +void make_intc2_irq(unsigned int irq, + unsigned int ipr_offset, unsigned int ipr_shift, + unsigned int msk_offset, unsigned int msk_shift, + unsigned int priority) { + int irq_offset = irq - INTC2_FIRST_IRQ; unsigned int flags; unsigned long ipr; - disable_irq_nosync(p->irq); + if (unlikely(irq_offset < 0 || irq_offset >= NR_INTC2_IRQS)) + return; + + disable_irq_nosync(irq); + + /* Fill the data we need */ + intc2_data[irq_offset].msk_offset = msk_offset; + intc2_data[irq_offset].msk_shift = msk_shift; + intc2_data[irq_offset].clear_irq = NULL; /* Set the priority level */ local_irq_save(flags); - ipr = ctrl_inl(INTC2_BASE + INTC2_INTPRI_OFFSET + p->ipr_offset); - ipr &= ~(0xf << p->ipr_shift); - ipr |= p->priority << p->ipr_shift; - ctrl_outl(ipr, INTC2_BASE + INTC2_INTPRI_OFFSET + p->ipr_offset); + ipr = ctrl_inl(INTC2_BASE + INTC2_INTPRI_OFFSET + ipr_offset); + ipr &= ~(0xf << ipr_shift); + ipr |= priority << ipr_shift; + ctrl_outl(ipr, INTC2_BASE + INTC2_INTPRI_OFFSET + ipr_offset); local_irq_restore(flags); - set_irq_chip_and_handler(p->irq, &intc2_irq_chip, handle_level_irq); - set_irq_chip_data(p->irq, p); + irq_desc[irq].chip = &intc2_irq_type; - enable_intc2_irq(p->irq); + disable_intc2_irq(irq); } -static struct intc2_data intc2_irq_table[] = { +static struct intc2_init { + unsigned short irq; + unsigned char ipr_offset, ipr_shift; + unsigned char msk_offset, msk_shift; + unsigned char priority; +} intc2_init_data[] __initdata = { #if defined(CONFIG_CPU_SUBTYPE_ST40) {64, 0, 0, 0, 0, 13}, /* PCI serr */ {65, 0, 4, 0, 1, 13}, /* PCI err */ @@ -189,6 +266,19 @@ void __init init_IRQ_intc2(void) { int i; - for (i = 0; i < ARRAY_SIZE(intc2_irq_table); i++) - make_intc2_irq(intc2_irq_table + i); + for (i = 0; i < ARRAY_SIZE(intc2_init_data); i++) { + struct intc2_init *p = intc2_init_data + i; + make_intc2_irq(p->irq, p->ipr_offset, p->ipr_shift, + p-> msk_offset, p->msk_shift, p->priority); + } +} + +/* Adds a termination callback to the interrupt */ +void intc2_add_clear_irq(int irq, int (*fn)(int)) +{ + if (unlikely(irq < INTC2_FIRST_IRQ)) + return; + + intc2_data[irq - INTC2_FIRST_IRQ].clear_irq = fn; } + diff --git a/trunk/arch/sh/kernel/cpu/irq/ipr.c b/trunk/arch/sh/kernel/cpu/irq/ipr.c index 8944abdf6e1c..f785822cd5de 100644 --- a/trunk/arch/sh/kernel/cpu/irq/ipr.c +++ b/trunk/arch/sh/kernel/cpu/irq/ipr.c @@ -1,10 +1,11 @@ /* - * Interrupt handling for IPR-based IRQ. + * arch/sh/kernel/cpu/irq/ipr.c * * Copyright (C) 1999 Niibe Yutaka & Takeshi Yaegashi * Copyright (C) 2000 Kazumoto Kojima - * Copyright (C) 2003 Takashi Kusuda - * Copyright (C) 2006 Paul Mundt + * Copyright (C) 2003 Takashi Kusuda + * + * Interrupt handling for IPR-based IRQ. * * Supported system: * On-chip supporting modules (TMU, RTC, etc.). @@ -12,13 +13,12 @@ * Hitachi SolutionEngine external I/O: * MS7709SE01, MS7709ASE01, and MS7750SE01 * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. */ + #include #include #include + #include #include #include @@ -28,45 +28,93 @@ struct ipr_data { int shift; /* Shifts of the 16-bit data */ int priority; /* The priority */ }; +static struct ipr_data ipr_data[NR_IRQS]; + +static void enable_ipr_irq(unsigned int irq); +static void disable_ipr_irq(unsigned int irq); + +/* shutdown is same as "disable" */ +#define shutdown_ipr_irq disable_ipr_irq + +static void mask_and_ack_ipr(unsigned int); +static void end_ipr_irq(unsigned int irq); + +static unsigned int startup_ipr_irq(unsigned int irq) +{ + enable_ipr_irq(irq); + return 0; /* never anything pending */ +} + +static struct hw_interrupt_type ipr_irq_type = { + .typename = "IPR-IRQ", + .startup = startup_ipr_irq, + .shutdown = shutdown_ipr_irq, + .enable = enable_ipr_irq, + .disable = disable_ipr_irq, + .ack = mask_and_ack_ipr, + .end = end_ipr_irq +}; static void disable_ipr_irq(unsigned int irq) { - struct ipr_data *p = get_irq_chip_data(irq); + unsigned long val; + unsigned int addr = ipr_data[irq].addr; + unsigned short mask = 0xffff ^ (0x0f << ipr_data[irq].shift); + /* Set the priority in IPR to 0 */ - ctrl_outw(ctrl_inw(p->addr) & (0xffff ^ (0xf << p->shift)), p->addr); + val = ctrl_inw(addr); + val &= mask; + ctrl_outw(val, addr); } static void enable_ipr_irq(unsigned int irq) { - struct ipr_data *p = get_irq_chip_data(irq); + unsigned long val; + unsigned int addr = ipr_data[irq].addr; + int priority = ipr_data[irq].priority; + unsigned short value = (priority << ipr_data[irq].shift); + /* Set priority in IPR back to original value */ - ctrl_outw(ctrl_inw(p->addr) | (p->priority << p->shift), p->addr); + val = ctrl_inw(addr); + val |= value; + ctrl_outw(val, addr); } -static struct irq_chip ipr_irq_chip = { - .name = "ipr", - .mask = disable_ipr_irq, - .unmask = enable_ipr_irq, - .mask_ack = disable_ipr_irq, -}; - -void make_ipr_irq(unsigned int irq, unsigned int addr, int pos, int priority) +static void mask_and_ack_ipr(unsigned int irq) { - struct ipr_data ipr_data; + disable_ipr_irq(irq); - disable_irq_nosync(irq); +#if defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7709) || \ + defined(CONFIG_CPU_SUBTYPE_SH7706) || \ + defined(CONFIG_CPU_SUBTYPE_SH7300) || defined(CONFIG_CPU_SUBTYPE_SH7705) + /* This is needed when we use edge triggered setting */ + /* XXX: Is it really needed? */ + if (IRQ0_IRQ <= irq && irq <= IRQ5_IRQ) { + /* Clear external interrupt request */ + int a = ctrl_inb(INTC_IRR0); + a &= ~(1 << (irq - IRQ0_IRQ)); + ctrl_outb(a, INTC_IRR0); + } +#endif +} - ipr_data.addr = addr; - ipr_data.shift = pos*4; /* POSition (0-3) x 4 means shift */ - ipr_data.priority = priority; +static void end_ipr_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + enable_ipr_irq(irq); +} - set_irq_chip_and_handler(irq, &ipr_irq_chip, handle_level_irq); - set_irq_chip_data(irq, &ipr_data); +void make_ipr_irq(unsigned int irq, unsigned int addr, int pos, int priority) +{ + disable_irq_nosync(irq); + ipr_data[irq].addr = addr; + ipr_data[irq].shift = pos*4; /* POSition (0-3) x 4 means shift */ + ipr_data[irq].priority = priority; - enable_ipr_irq(irq); + irq_desc[irq].chip = &ipr_irq_type; + disable_ipr_irq(irq); } -/* XXX: This needs to die a horrible death.. */ void __init init_IRQ(void) { #ifndef CONFIG_CPU_SUBTYPE_SH7780 diff --git a/trunk/arch/sh/kernel/cpu/sh3/ex.S b/trunk/arch/sh/kernel/cpu/sh3/ex.S index 6be46f0686b7..44daf44833f9 100644 --- a/trunk/arch/sh/kernel/cpu/sh3/ex.S +++ b/trunk/arch/sh/kernel/cpu/sh3/ex.S @@ -49,3 +49,198 @@ ENTRY(nmi_slot) #endif ENTRY(user_break_point_trap) .long break_point_trap /* 1E0 */ +ENTRY(interrupt_table) + ! external hardware + .long do_IRQ ! 0000 /* 200 */ + .long do_IRQ ! 0001 + .long do_IRQ ! 0010 + .long do_IRQ ! 0011 + .long do_IRQ ! 0100 + .long do_IRQ ! 0101 + .long do_IRQ ! 0110 + .long do_IRQ ! 0111 + .long do_IRQ ! 1000 /* 300 */ + .long do_IRQ ! 1001 + .long do_IRQ ! 1010 + .long do_IRQ ! 1011 + .long do_IRQ ! 1100 + .long do_IRQ ! 1101 + .long do_IRQ ! 1110 + .long exception_error + ! Internal hardware + .long do_IRQ ! TMU0 tuni0 /* 400 */ + .long do_IRQ ! TMU1 tuni1 + .long do_IRQ ! TMU2 tuni2 + .long do_IRQ ! ticpi2 + .long do_IRQ ! RTC ati + .long do_IRQ ! pri + .long do_IRQ ! cui + .long do_IRQ ! SCI eri + .long do_IRQ ! rxi /* 500 */ + .long do_IRQ ! txi + .long do_IRQ ! tei + .long do_IRQ ! WDT iti /* 560 */ + .long do_IRQ ! REF rcmi + .long do_IRQ ! rovi + .long do_IRQ + .long do_IRQ /* 5E0 */ +#if defined(CONFIG_CPU_SUBTYPE_SH7707) || \ + defined(CONFIG_CPU_SUBTYPE_SH7709) || \ + defined(CONFIG_CPU_SUBTYPE_SH7706) || \ + defined(CONFIG_CPU_SUBTYPE_SH7300) || \ + defined(CONFIG_CPU_SUBTYPE_SH7705) || \ + defined(CONFIG_CPU_SUBTYPE_SH7710) + .long do_IRQ ! 32 IRQ irq0 /* 600 */ + .long do_IRQ ! 33 irq1 + .long do_IRQ ! 34 irq2 + .long do_IRQ ! 35 irq3 + .long do_IRQ ! 36 irq4 + .long do_IRQ ! 37 irq5 + .long do_IRQ ! 38 + .long do_IRQ ! 39 + .long do_IRQ ! 40 PINT pint0-7 /* 700 */ + .long do_IRQ ! 41 pint8-15 + .long do_IRQ ! 42 + .long do_IRQ ! 43 + .long do_IRQ ! 44 + .long do_IRQ ! 45 + .long do_IRQ ! 46 + .long do_IRQ ! 47 + .long do_IRQ ! 48 DMAC dei0 /* 800 */ + .long do_IRQ ! 49 dei1 + .long do_IRQ ! 50 dei2 + .long do_IRQ ! 51 dei3 + .long do_IRQ ! 52 IrDA eri1 + .long do_IRQ ! 53 rxi1 + .long do_IRQ ! 54 bri1 + .long do_IRQ ! 55 txi1 + .long do_IRQ ! 56 SCIF eri2 + .long do_IRQ ! 57 rxi2 + .long do_IRQ ! 58 bri2 + .long do_IRQ ! 59 txi2 + .long do_IRQ ! 60 ADC adi /* 980 */ +#if defined(CONFIG_CPU_SUBTYPE_SH7705) + .long exception_none ! 61 /* 9A0 */ + .long exception_none ! 62 + .long exception_none ! 63 + .long exception_none ! 64 /* A00 */ + .long do_IRQ ! 65 USB usi0 + .long do_IRQ ! 66 usi1 + .long exception_none ! 67 + .long exception_none ! 68 + .long exception_none ! 69 + .long exception_none ! 70 + .long exception_none ! 71 + .long exception_none ! 72 /* B00 */ + .long exception_none ! 73 + .long exception_none ! 74 + .long exception_none ! 75 + .long exception_none ! 76 + .long exception_none ! 77 + .long exception_none ! 78 + .long exception_none ! 79 + .long do_IRQ ! 80 TPU0 tpi0 /* C00 */ + .long do_IRQ ! 81 TPU1 tpi1 + .long exception_none ! 82 + .long exception_none ! 83 + .long do_IRQ ! 84 TPU2 tpi2 + .long do_IRQ ! 85 TPU3 tpi3 /* CA0 */ +#endif +#if defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7300) + .long do_IRQ ! 61 LCDC lcdi /* 9A0 */ + .long do_IRQ ! 62 PCC pcc0i + .long do_IRQ ! 63 pcc1i /* 9E0 */ +#endif +#if defined(CONFIG_CPU_SUBTYPE_SH7710) + .long exception_none ! 61 /* 9A0 */ + .long exception_none ! 62 + .long exception_none ! 63 + .long exception_none ! 64 /* A00 */ + .long exception_none ! 65 + .long exception_none ! 66 + .long exception_none ! 67 + .long exception_none ! 68 + .long exception_none ! 69 + .long exception_none ! 70 + .long exception_none ! 71 + .long exception_none ! 72 /* B00 */ + .long exception_none ! 73 + .long exception_none ! 74 + .long exception_none ! 75 + .long do_IRQ ! 76 DMAC2 dei4 /* B80 */ + .long do_IRQ ! 77 DMAC2 dei5 + .long exception_none ! 78 + .long do_IRQ ! 79 IPSEC ipseci /* BE0 */ + .long do_IRQ ! 80 EDMAC eint0 /* C00 */ + .long do_IRQ ! 81 EDMAC eint1 + .long do_IRQ ! 82 EDMAC eint2 + .long exception_none ! 83 /* C60 */ + .long exception_none ! 84 + .long exception_none ! 85 + .long exception_none ! 86 + .long exception_none ! 87 + .long exception_none ! 88 /* D00 */ + .long exception_none ! 89 + .long exception_none ! 90 + .long exception_none ! 91 + .long exception_none ! 92 + .long exception_none ! 93 + .long exception_none ! 94 + .long exception_none ! 95 + .long do_IRQ ! 96 SIOF eri0 /* E00 */ + .long do_IRQ ! 97 txi0 + .long do_IRQ ! 98 rxi0 + .long do_IRQ ! 99 cci0 + .long do_IRQ ! 100 eri1 /* E80 */ + .long do_IRQ ! 101 txi1 + .long do_IRQ ! 102 rxi2 + .long do_IRQ ! 103 cci3 +#endif +#if defined(CONFIG_CPU_SUBTYPE_SH7300) + .long do_IRQ ! 64 + .long do_IRQ ! 65 + .long do_IRQ ! 66 + .long do_IRQ ! 67 + .long do_IRQ ! 68 + .long do_IRQ ! 69 + .long do_IRQ ! 70 + .long do_IRQ ! 71 + .long do_IRQ ! 72 + .long do_IRQ ! 73 + .long do_IRQ ! 74 + .long do_IRQ ! 75 + .long do_IRQ ! 76 + .long do_IRQ ! 77 + .long do_IRQ ! 78 + .long do_IRQ ! 79 + .long do_IRQ ! 80 SCIF0(SH7300) + .long do_IRQ ! 81 + .long do_IRQ ! 82 + .long do_IRQ ! 83 + .long do_IRQ ! 84 + .long do_IRQ ! 85 + .long do_IRQ ! 86 + .long do_IRQ ! 87 + .long do_IRQ ! 88 + .long do_IRQ ! 89 + .long do_IRQ ! 90 + .long do_IRQ ! 91 + .long do_IRQ ! 92 + .long do_IRQ ! 93 + .long do_IRQ ! 94 + .long do_IRQ ! 95 + .long do_IRQ ! 96 + .long do_IRQ ! 97 + .long do_IRQ ! 98 + .long do_IRQ ! 99 + .long do_IRQ ! 100 + .long do_IRQ ! 101 + .long do_IRQ ! 102 + .long do_IRQ ! 103 + .long do_IRQ ! 104 + .long do_IRQ ! 105 + .long do_IRQ ! 106 + .long do_IRQ ! 107 + .long do_IRQ ! 108 +#endif +#endif diff --git a/trunk/arch/sh/kernel/cpu/sh4/ex.S b/trunk/arch/sh/kernel/cpu/sh4/ex.S index 3f4cd043e900..7146893a6cca 100644 --- a/trunk/arch/sh/kernel/cpu/sh4/ex.S +++ b/trunk/arch/sh/kernel/cpu/sh4/ex.S @@ -53,3 +53,503 @@ ENTRY(nmi_slot) #endif ENTRY(user_break_point_trap) .long break_point_trap /* 1E0 */ +ENTRY(interrupt_table) + ! external hardware + .long do_IRQ ! 0000 /* 200 */ + .long do_IRQ ! 0001 + .long do_IRQ ! 0010 + .long do_IRQ ! 0011 + .long do_IRQ ! 0100 + .long do_IRQ ! 0101 + .long do_IRQ ! 0110 + .long do_IRQ ! 0111 + .long do_IRQ ! 1000 /* 300 */ + .long do_IRQ ! 1001 + .long do_IRQ ! 1010 + .long do_IRQ ! 1011 + .long do_IRQ ! 1100 + .long do_IRQ ! 1101 + .long do_IRQ ! 1110 + .long exception_error + ! Internal hardware +#ifndef CONFIG_CPU_SUBTYPE_SH7780 + .long do_IRQ ! TMU0 tuni0 /* 400 */ + .long do_IRQ ! TMU1 tuni1 + .long do_IRQ ! TMU2 tuni2 + .long do_IRQ ! ticpi2 +#if defined(CONFIG_CPU_SUBTYPE_SH7760) + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error /* 500 */ + .long exception_error + .long exception_error +#else + .long do_IRQ ! RTC ati + .long do_IRQ ! pri + .long do_IRQ ! cui + .long do_IRQ ! SCI eri + .long do_IRQ ! rxi /* 500 */ + .long do_IRQ ! txi + .long do_IRQ ! tei +#endif + .long do_IRQ ! WDT iti /* 560 */ + .long do_IRQ ! REF rcmi + .long do_IRQ ! rovi + .long do_IRQ + .long do_IRQ /* 5E0 */ + .long do_IRQ ! 32 Hitachi UDI /* 600 */ + .long do_IRQ ! 33 GPIO + .long do_IRQ ! 34 DMAC dmte0 + .long do_IRQ ! 35 dmte1 + .long do_IRQ ! 36 dmte2 + .long do_IRQ ! 37 dmte3 + .long do_IRQ ! 38 dmae + .long exception_error ! 39 /* 6E0 */ +#if defined(CONFIG_CPU_SUBTYPE_SH7760) + .long exception_error /* 700 */ + .long exception_error + .long exception_error + .long exception_error /* 760 */ +#else + .long do_IRQ ! 40 SCIF eri /* 700 */ + .long do_IRQ ! 41 rxi + .long do_IRQ ! 42 bri + .long do_IRQ ! 43 txi +#endif +#if CONFIG_NR_ONCHIP_DMA_CHANNELS == 8 + .long do_IRQ ! 44 DMAC dmte4 /* 780 */ + .long do_IRQ ! 45 dmte5 + .long do_IRQ ! 46 dmte6 + .long do_IRQ ! 47 dmte7 /* 7E0 */ +#elif defined(CONFIG_CPU_SUBTYPE_SH7343) + .long do_IRQ ! 44 IIC1 ali /* 780 */ + .long do_IRQ ! 45 tacki + .long do_IRQ ! 46 waiti + .long do_IRQ ! 47 dtei /* 7E0 */ + .long do_IRQ ! 48 DMAC dei0 /* 800 */ + .long do_IRQ ! 49 dei1 /* 820 */ +#else + .long exception_error ! 44 /* 780 */ + .long exception_error ! 45 + .long exception_error ! 46 + .long exception_error ! 47 +#endif +#if defined(CONFIG_SH_FPU) + .long do_fpu_state_restore ! 48 /* 800 */ + .long do_fpu_state_restore ! 49 /* 820 */ +#elif !defined(CONFIG_CPU_SUBTYPE_SH7343) && \ + !defined(CONFIG_CPU_SUBTYPE_SH73180) + .long exception_error + .long exception_error +#endif +#if defined(CONFIG_CPU_SUBTYPE_SH7751) + .long exception_error /* 840 */ + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error /* 900 */ + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! PCI serr /* A00 */ + .long do_IRQ ! dma3 + .long do_IRQ ! dma2 + .long do_IRQ ! dma1 + .long do_IRQ ! dma0 + .long do_IRQ ! pwon + .long do_IRQ ! pwdwn + .long do_IRQ ! err + .long do_IRQ ! TMU3 tuni3 /* B00 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! TMU4 tuni4 /* B80 */ +#elif defined(CONFIG_CPU_SUBTYPE_SH7760) + .long do_IRQ ! IRQ irq6 /* 840 */ + .long do_IRQ ! irq7 + .long do_IRQ ! SCIF eri0 + .long do_IRQ ! rxi0 + .long do_IRQ ! bri0 + .long do_IRQ ! txi0 + .long do_IRQ ! HCAN2 cani0 /* 900 */ + .long do_IRQ ! cani1 + .long do_IRQ ! SSI ssii0 + .long do_IRQ ! ssii1 + .long do_IRQ ! HAC haci0 + .long do_IRQ ! haci1 + .long do_IRQ ! IIC iici0 + .long do_IRQ ! iici1 + .long do_IRQ ! USB usbi /* A00 */ + .long do_IRQ ! LCDC vint + .long exception_error + .long exception_error + .long do_IRQ ! DMABRG dmabrgi0 + .long do_IRQ ! dmabrgi1 + .long do_IRQ ! dmabrgi2 + .long exception_error + .long do_IRQ ! SCIF eri1 /* B00 */ + .long do_IRQ ! rxi1 + .long do_IRQ ! bri1 + .long do_IRQ ! txi1 + .long do_IRQ ! eri2 + .long do_IRQ ! rxi2 + .long do_IRQ ! bri2 + .long do_IRQ ! txi2 + .long do_IRQ ! SIM simeri /* C00 */ + .long do_IRQ ! simrxi + .long do_IRQ ! simtxi + .long do_IRQ ! simtei + .long do_IRQ ! HSPI spii + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! MMCIF mmci0 /* D00 */ + .long do_IRQ ! mmci1 + .long do_IRQ ! mmci2 + .long do_IRQ ! mmci3 + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error /* E00 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! MFI mfii + .long exception_error + .long exception_error + .long exception_error + .long exception_error /* F00 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! ADC adi + .long do_IRQ ! CMT cmti /* FA0 */ +#elif defined(CONFIG_CPU_SUBTYPE_SH73180) || defined(CONFIG_CPU_SUBTYPE_SH7343) + .long do_IRQ ! 50 0x840 + .long do_IRQ ! 51 0x860 + .long do_IRQ ! 52 0x880 + .long do_IRQ ! 53 0x8a0 + .long do_IRQ ! 54 0x8c0 + .long do_IRQ ! 55 0x8e0 + .long do_IRQ ! 56 0x900 + .long do_IRQ ! 57 0x920 + .long do_IRQ ! 58 0x940 + .long do_IRQ ! 59 0x960 + .long do_IRQ ! 60 0x980 + .long do_IRQ ! 61 0x9a0 + .long do_IRQ ! 62 0x9c0 + .long do_IRQ ! 63 0x9e0 + .long do_IRQ ! 64 0xa00 + .long do_IRQ ! 65 0xa20 + .long do_IRQ ! 66 0xa40 + .long do_IRQ ! 67 0xa60 + .long do_IRQ ! 68 0xa80 + .long do_IRQ ! 69 0xaa0 + .long do_IRQ ! 70 0xac0 + .long do_IRQ ! 71 0xae0 + .long do_IRQ ! 72 0xb00 + .long do_IRQ ! 73 0xb20 + .long do_IRQ ! 74 0xb40 + .long do_IRQ ! 75 0xb60 + .long do_IRQ ! 76 0xb80 + .long do_IRQ ! 77 0xba0 + .long do_IRQ ! 78 0xbc0 + .long do_IRQ ! 79 0xbe0 + .long do_IRQ ! 80 0xc00 + .long do_IRQ ! 81 0xc20 + .long do_IRQ ! 82 0xc40 + .long do_IRQ ! 83 0xc60 + .long do_IRQ ! 84 0xc80 + .long do_IRQ ! 85 0xca0 + .long do_IRQ ! 86 0xcc0 + .long do_IRQ ! 87 0xce0 + .long do_IRQ ! 88 0xd00 + .long do_IRQ ! 89 0xd20 + .long do_IRQ ! 90 0xd40 + .long do_IRQ ! 91 0xd60 + .long do_IRQ ! 92 0xd80 + .long do_IRQ ! 93 0xda0 + .long do_IRQ ! 94 0xdc0 + .long do_IRQ ! 95 0xde0 + .long do_IRQ ! 96 0xe00 + .long do_IRQ ! 97 0xe20 + .long do_IRQ ! 98 0xe40 + .long do_IRQ ! 99 0xe60 + .long do_IRQ ! 100 0xe80 + .long do_IRQ ! 101 0xea0 + .long do_IRQ ! 102 0xec0 + .long do_IRQ ! 103 0xee0 + .long do_IRQ ! 104 0xf00 + .long do_IRQ ! 105 0xf20 + .long do_IRQ ! 106 0xf40 + .long do_IRQ ! 107 0xf60 + .long do_IRQ ! 108 0xf80 +#elif defined(CONFIG_CPU_SUBTYPE_ST40STB1) + .long exception_error ! 50 0x840 + .long exception_error ! 51 0x860 + .long exception_error ! 52 0x880 + .long exception_error ! 53 0x8a0 + .long exception_error ! 54 0x8c0 + .long exception_error ! 55 0x8e0 + .long exception_error ! 56 0x900 + .long exception_error ! 57 0x920 + .long exception_error ! 58 0x940 + .long exception_error ! 59 0x960 + .long exception_error ! 60 0x980 + .long exception_error ! 61 0x9a0 + .long exception_error ! 62 0x9c0 + .long exception_error ! 63 0x9e0 + .long do_IRQ ! 64 0xa00 PCI serr + .long do_IRQ ! 65 0xa20 err + .long do_IRQ ! 66 0xa40 ad + .long do_IRQ ! 67 0xa60 pwr_dwn + .long exception_error ! 68 0xa80 + .long exception_error ! 69 0xaa0 + .long exception_error ! 70 0xac0 + .long exception_error ! 71 0xae0 + .long do_IRQ ! 72 0xb00 DMA INT0 + .long do_IRQ ! 73 0xb20 INT1 + .long do_IRQ ! 74 0xb40 INT2 + .long do_IRQ ! 75 0xb60 INT3 + .long do_IRQ ! 76 0xb80 INT4 + .long exception_error ! 77 0xba0 + .long do_IRQ ! 78 0xbc0 DMA ERR + .long exception_error ! 79 0xbe0 + .long do_IRQ ! 80 0xc00 PIO0 + .long do_IRQ ! 81 0xc20 PIO1 + .long do_IRQ ! 82 0xc40 PIO2 + .long exception_error ! 83 0xc60 + .long exception_error ! 84 0xc80 + .long exception_error ! 85 0xca0 + .long exception_error ! 86 0xcc0 + .long exception_error ! 87 0xce0 + .long exception_error ! 88 0xd00 + .long exception_error ! 89 0xd20 + .long exception_error ! 90 0xd40 + .long exception_error ! 91 0xd60 + .long exception_error ! 92 0xd80 + .long exception_error ! 93 0xda0 + .long exception_error ! 94 0xdc0 + .long exception_error ! 95 0xde0 + .long exception_error ! 96 0xe00 + .long exception_error ! 97 0xe20 + .long exception_error ! 98 0xe40 + .long exception_error ! 99 0xe60 + .long exception_error ! 100 0xe80 + .long exception_error ! 101 0xea0 + .long exception_error ! 102 0xec0 + .long exception_error ! 103 0xee0 + .long exception_error ! 104 0xf00 + .long exception_error ! 105 0xf20 + .long exception_error ! 106 0xf40 + .long exception_error ! 107 0xf60 + .long exception_error ! 108 0xf80 + .long exception_error ! 109 0xfa0 + .long exception_error ! 110 0xfc0 + .long exception_error ! 111 0xfe0 + .long do_IRQ ! 112 0x1000 Mailbox + .long exception_error ! 113 0x1020 + .long exception_error ! 114 0x1040 + .long exception_error ! 115 0x1060 + .long exception_error ! 116 0x1080 + .long exception_error ! 117 0x10a0 + .long exception_error ! 118 0x10c0 + .long exception_error ! 119 0x10e0 + .long exception_error ! 120 0x1100 + .long exception_error ! 121 0x1120 + .long exception_error ! 122 0x1140 + .long exception_error ! 123 0x1160 + .long exception_error ! 124 0x1180 + .long exception_error ! 125 0x11a0 + .long exception_error ! 126 0x11c0 + .long exception_error ! 127 0x11e0 + .long exception_error ! 128 0x1200 + .long exception_error ! 129 0x1220 + .long exception_error ! 130 0x1240 + .long exception_error ! 131 0x1260 + .long exception_error ! 132 0x1280 + .long exception_error ! 133 0x12a0 + .long exception_error ! 134 0x12c0 + .long exception_error ! 135 0x12e0 + .long exception_error ! 136 0x1300 + .long exception_error ! 137 0x1320 + .long exception_error ! 138 0x1340 + .long exception_error ! 139 0x1360 + .long do_IRQ ! 140 0x1380 EMPI INV_ADDR + .long exception_error ! 141 0x13a0 + .long exception_error ! 142 0x13c0 + .long exception_error ! 143 0x13e0 +#elif defined(CONFIG_CPU_SUBTYPE_SH7770) + .long do_IRQ ! 50 0x840 + .long do_IRQ ! 51 0x860 + .long do_IRQ ! 52 0x880 + .long do_IRQ ! 53 0x8a0 + .long do_IRQ ! 54 0x8c0 + .long do_IRQ ! 55 0x8e0 + .long do_IRQ ! 56 0x900 + .long do_IRQ ! 57 0x920 + .long do_IRQ ! 58 0x940 + .long do_IRQ ! 59 0x960 + .long do_IRQ ! 60 0x980 + .long do_IRQ ! 61 0x9a0 + .long do_IRQ ! 62 0x9c0 + .long do_IRQ ! 63 0x9e0 + .long do_IRQ ! 64 0xa00 + .long do_IRQ ! 65 0xa20 + .long do_IRQ ! 66 0xa4d + .long do_IRQ ! 67 0xa60 + .long do_IRQ ! 68 0xa80 + .long do_IRQ ! 69 0xaa0 + .long do_IRQ ! 70 0xac0 + .long do_IRQ ! 71 0xae0 + .long do_IRQ ! 72 0xb00 + .long do_IRQ ! 73 0xb20 + .long do_IRQ ! 74 0xb40 + .long do_IRQ ! 75 0xb60 + .long do_IRQ ! 76 0xb80 + .long do_IRQ ! 77 0xba0 + .long do_IRQ ! 78 0xbc0 + .long do_IRQ ! 79 0xbe0 + .long do_IRQ ! 80 0xc00 + .long do_IRQ ! 81 0xc20 + .long do_IRQ ! 82 0xc40 + .long do_IRQ ! 83 0xc60 + .long do_IRQ ! 84 0xc80 + .long do_IRQ ! 85 0xca0 + .long do_IRQ ! 86 0xcc0 + .long do_IRQ ! 87 0xce0 + .long do_IRQ ! 88 0xd00 + .long do_IRQ ! 89 0xd20 + .long do_IRQ ! 90 0xd40 + .long do_IRQ ! 91 0xd60 + .long do_IRQ ! 92 0xd80 + .long do_IRQ ! 93 0xda0 + .long do_IRQ ! 94 0xdc0 + .long do_IRQ ! 95 0xde0 + .long do_IRQ ! 96 0xe00 + .long do_IRQ ! 97 0xe20 + .long do_IRQ ! 98 0xe40 + .long do_IRQ ! 99 0xe60 + .long do_IRQ ! 100 0xe80 + .long do_IRQ ! 101 0xea0 + .long do_IRQ ! 102 0xec0 + .long do_IRQ ! 103 0xee0 + .long do_IRQ ! 104 0xf00 + .long do_IRQ ! 105 0xf20 + .long do_IRQ ! 106 0xf40 + .long do_IRQ ! 107 0xf60 + .long do_IRQ ! 108 0xf80 +#endif +#else + .long exception_error /* 400 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! RTC ati + .long do_IRQ ! pri + .long do_IRQ ! cui + .long exception_error + .long exception_error /* 500 */ + .long exception_error + .long exception_error + .long do_IRQ ! WDT iti /* 560 */ + .long do_IRQ ! TMU-ch0 + .long do_IRQ ! TMU-ch1 + .long do_IRQ ! TMU-ch2 + .long do_IRQ ! ticpi2 /* 5E0 */ + .long do_IRQ ! 32 Hitachi UDI /* 600 */ + .long exception_error + .long do_IRQ ! 34 DMAC dmte0 + .long do_IRQ ! 35 dmte1 + .long do_IRQ ! 36 dmte2 + .long do_IRQ ! 37 dmte3 + .long do_IRQ ! 38 dmae + .long exception_error ! 39 /* 6E0 */ + .long do_IRQ ! 40 SCIF-ch0 eri /* 700 */ + .long do_IRQ ! 41 rxi + .long do_IRQ ! 42 bri + .long do_IRQ ! 43 txi + .long do_IRQ ! 44 DMAC dmte4 /* 780 */ + .long do_IRQ ! 45 dmte5 + .long do_IRQ ! 46 dmte6 + .long do_IRQ ! 47 dmte7 /* 7E0 */ +#if defined(CONFIG_SH_FPU) + .long do_fpu_state_restore ! 48 /* 800 */ + .long do_fpu_state_restore ! 49 /* 820 */ +#else + .long exception_error + .long exception_error +#endif + .long exception_error /* 840 */ + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! 56 CMT /* 900 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! 60 HAC + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! PCI serr /* A00 */ + .long do_IRQ ! INTA + .long do_IRQ ! INTB + .long do_IRQ ! INTC + .long do_IRQ ! INTD + .long do_IRQ ! err + .long do_IRQ ! pwd3 + .long do_IRQ ! pwd2 + .long do_IRQ ! pwd1 /* B00 */ + .long do_IRQ ! pwd0 + .long exception_error + .long exception_error + .long do_IRQ ! SCIF-ch1 eri /* B80 */ + .long do_IRQ ! rxi + .long do_IRQ ! bri + .long do_IRQ ! txi + .long do_IRQ ! SIOF /* C00 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! HSPI /* C80 */ + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! MMCIF fatat /* D00 */ + .long do_IRQ ! tran + .long do_IRQ ! err + .long do_IRQ ! frdy + .long do_IRQ ! DMAC dmint8 /* D80 */ + .long do_IRQ ! dmint9 + .long do_IRQ ! dmint10 + .long do_IRQ ! dmint11 + .long do_IRQ ! TMU-ch3 /* E00 */ + .long do_IRQ ! TMU-ch4 + .long do_IRQ ! TMU-ch5 + .long exception_error + .long do_IRQ ! SSI + .long exception_error + .long exception_error + .long exception_error + .long do_IRQ ! FLCTL flste /* F00 */ + .long do_IRQ ! fltend + .long do_IRQ ! fltrq0 + .long do_IRQ ! fltrq1 + .long do_IRQ ! GPIO gpioi0 /* F80 */ + .long do_IRQ ! gpioi1 + .long do_IRQ ! gpioi2 + .long do_IRQ ! gpioi3 +#endif + diff --git a/trunk/arch/sh/kernel/entry.S b/trunk/arch/sh/kernel/entry.S index 39aaefb2d83f..97c571fbcdf1 100644 --- a/trunk/arch/sh/kernel/entry.S +++ b/trunk/arch/sh/kernel/entry.S @@ -1,8 +1,9 @@ -/* +/* $Id: entry.S,v 1.37 2004/06/11 13:02:46 doyu Exp $ + * * linux/arch/sh/entry.S * * Copyright (C) 1999, 2000, 2002 Niibe Yutaka - * Copyright (C) 2003 - 2006 Paul Mundt + * Copyright (C) 2003 Paul Mundt * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -77,6 +78,7 @@ OFF_TRA = (16*4+6*4) #define k3 r3 #define k4 r4 +#define k_ex_code r2_bank /* r2_bank1 */ #define g_imask r6 /* r6_bank1 */ #define k_g_imask r6_bank /* r6_bank1 */ #define current r7 /* r7_bank1 */ @@ -689,7 +691,7 @@ interrupt: 0: #endif /* defined(CONFIG_KGDB_NMI) */ bra handle_exception - mov #-1, k2 ! interrupt exception marker + mov.l @k2, k2 .align 2 1: .long EXPEVT @@ -715,7 +717,8 @@ ENTRY(handle_exception) add current, k1 mov k1, r15 ! change to kernel stack ! -1: mov.l 2f, k1 +1: mov #-1, k4 + mov.l 2f, k1 ! #ifdef CONFIG_SH_DSP mov.l r2, @-r15 ! Save r2, we need another reg @@ -760,8 +763,6 @@ skip_save: #endif ! Save the user registers on the stack. mov.l k2, @-r15 ! EXPEVT - - mov #-1, k4 mov.l k4, @-r15 ! set TRA (default: -1) ! sts.l macl, @-r15 @@ -796,21 +797,8 @@ skip_save: mov.l r2, @-r15 mov.l r1, @-r15 mov.l r0, @-r15 - - /* - * This gets a bit tricky.. in the INTEVT case we don't want to use - * the VBR offset as a destination in the jump call table, since all - * of the destinations are the same. In this case, (interrupt) sets - * a marker in r2 (now r2_bank since SR.RB changed), which we check - * to determine the exception type. For all other exceptions, we - * forcibly read EXPEVT from memory and fix up the jump address, in - * the interrupt exception case we jump to do_IRQ() and defer the - * INTEVT read until there. As a bonus, we can also clean up the SR.RB - * checks that do_IRQ() was doing.. - */ - stc r2_bank, r8 - cmp/pz r8 - bf interrupt_exception + ! Then, dispatch to the handler, according to the exception code. + stc k_ex_code, r8 shlr2 r8 shlr r8 mov.l 4f, r9 @@ -818,8 +806,6 @@ skip_save: mov.l @r9, r9 jmp @r9 nop - rts - nop .align 2 1: .long 0x00001000 ! DSP=1 @@ -827,17 +813,8 @@ skip_save: 3: .long 0xcfffffff ! RB=0, BL=0 4: .long exception_handling_table -interrupt_exception: - mov.l 1f, r9 - jmp @r9 - nop - rts - nop - - .align 2 -1: .long do_IRQ - .align 2 ENTRY(exception_none) rts nop + diff --git a/trunk/arch/sh/kernel/irq.c b/trunk/arch/sh/kernel/irq.c index acf2602569c4..c7ebd6aec951 100644 --- a/trunk/arch/sh/kernel/irq.c +++ b/trunk/arch/sh/kernel/irq.c @@ -11,15 +11,12 @@ #include #include #include -#include #include #include #include #include #include -atomic_t irq_err_count; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves, it doesn't deserve @@ -27,7 +24,6 @@ atomic_t irq_err_count; */ void ack_bad_irq(unsigned int irq) { - atomic_inc(&irq_err_count); printk("unexpected IRQ trap at vector %02x\n", irq); } @@ -51,10 +47,8 @@ int show_interrupts(struct seq_file *p, void *v) if (!action) goto unlock; seq_printf(p, "%3d: ",i); - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); - seq_printf(p, " %14s", irq_desc[i].chip->name); - seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq)); + seq_printf(p, "%10u ", kstat_irqs(i)); + seq_printf(p, " %14s", irq_desc[i].chip->typename); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) @@ -62,9 +56,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) - seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count)); - + } return 0; } #endif @@ -86,8 +78,7 @@ asmlinkage int do_IRQ(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, struct pt_regs regs) { - struct pt_regs *old_regs = set_irq_regs(®s); - int irq; + int irq = r4; #ifdef CONFIG_4KSTACKS union irq_ctx *curctx, *irqctx; #endif @@ -111,9 +102,20 @@ asmlinkage int do_IRQ(unsigned long r4, unsigned long r5, #endif #ifdef CONFIG_CPU_HAS_INTEVT - irq = (ctrl_inl(INTEVT) >> 5) - 16; + __asm__ __volatile__ ( +#ifdef CONFIG_CPU_HAS_SR_RB + "stc r2_bank, %0\n\t" #else - irq = r4; + "mov.l @%1, %0\n\t" +#endif + "shlr2 %0\n\t" + "shlr2 %0\n\t" + "shlr %0\n\t" + "add #-16, %0\n\t" + : "=z" (irq), "=r" (r4) + : "1" (INTEVT) + : "memory" + ); #endif irq = irq_demux(irq); @@ -137,25 +139,25 @@ asmlinkage int do_IRQ(unsigned long r4, unsigned long r5, __asm__ __volatile__ ( "mov %0, r4 \n" + "mov %1, r5 \n" "mov r15, r9 \n" - "jsr @%1 \n" + "jsr @%2 \n" /* swith to the irq stack */ - " mov %2, r15 \n" + " mov %3, r15 \n" /* restore the stack (ring zero) */ "mov r9, r15 \n" : /* no outputs */ - : "r" (irq), "r" (generic_handle_irq), "r" (isp) + : "r" (irq), "r" (®s), "r" (__do_IRQ), "r" (isp) /* XXX: A somewhat excessive clobber list? -PFM */ : "memory", "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "t", "pr" ); } else #endif - generic_handle_irq(irq); + __do_IRQ(irq, ®s); irq_exit(); - set_irq_regs(old_regs); return 1; } diff --git a/trunk/arch/sh/kernel/process.c b/trunk/arch/sh/kernel/process.c index 91516dca4a85..0b1d5dd7a93b 100644 --- a/trunk/arch/sh/kernel/process.c +++ b/trunk/arch/sh/kernel/process.c @@ -5,7 +5,6 @@ * Copyright (C) 1995 Linus Torvalds * * SuperH version: Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima - * Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC */ /* @@ -291,24 +290,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, static void ubc_set_tracing(int asid, unsigned long pc) { -#if defined(CONFIG_CPU_SH4A) - unsigned long val; - - val = (UBC_CBR_ID_INST | UBC_CBR_RW_READ | UBC_CBR_CE); - val |= (UBC_CBR_AIE | UBC_CBR_AIV_SET(asid)); - - ctrl_outl(val, UBC_CBR0); - ctrl_outl(pc, UBC_CAR0); - ctrl_outl(0x0, UBC_CAMR0); - ctrl_outl(0x0, UBC_CBCR); - - val = (UBC_CRR_RES | UBC_CRR_PCB | UBC_CRR_BIE); - ctrl_outl(val, UBC_CRR0); - - /* Read UBC register that we writed last. For chekking UBC Register changed */ - val = ctrl_inl(UBC_CRR0); - -#else /* CONFIG_CPU_SH4A */ ctrl_outl(pc, UBC_BARA); #ifdef CONFIG_MMU @@ -326,7 +307,6 @@ ubc_set_tracing(int asid, unsigned long pc) ctrl_outw(BBR_INST | BBR_READ, UBC_BBRA); ctrl_outw(BRCR_PCBA, UBC_BRCR); } -#endif /* CONFIG_CPU_SH4A */ } /* @@ -379,13 +359,8 @@ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *ne #endif ubc_set_tracing(asid, next->thread.ubc_pc); } else { -#if defined(CONFIG_CPU_SH4A) - ctrl_outl(UBC_CBR_INIT, UBC_CBR0); - ctrl_outl(UBC_CRR_INIT, UBC_CRR0); -#else ctrl_outw(0, UBC_BBRA); ctrl_outw(0, UBC_BBRB); -#endif } return prev; @@ -485,13 +460,8 @@ asmlinkage void break_point_trap(unsigned long r4, unsigned long r5, struct pt_regs regs) { /* Clear tracing. */ -#if defined(CONFIG_CPU_SH4A) - ctrl_outl(UBC_CBR_INIT, UBC_CBR0); - ctrl_outl(UBC_CRR_INIT, UBC_CRR0); -#else ctrl_outw(0, UBC_BBRA); ctrl_outw(0, UBC_BBRB); -#endif current->thread.ubc_pc = 0; ubc_usercnt -= 1; diff --git a/trunk/arch/sh/kernel/time.c b/trunk/arch/sh/kernel/time.c index 57e708d7b52d..450c68f1df05 100644 --- a/trunk/arch/sh/kernel/time.c +++ b/trunk/arch/sh/kernel/time.c @@ -47,7 +47,6 @@ unsigned long long __attribute__ ((weak)) sched_clock(void) return (unsigned long long)jiffies * (1000000000 / HZ); } -#ifndef CONFIG_GENERIC_TIME void do_gettimeofday(struct timeval *tv) { unsigned long seq; @@ -100,7 +99,6 @@ int do_settimeofday(struct timespec *tv) return 0; } EXPORT_SYMBOL(do_settimeofday); -#endif /* !CONFIG_GENERIC_TIME */ /* last time the RTC clock got updated */ static long last_rtc_update; @@ -109,14 +107,13 @@ static long last_rtc_update; * handle_timer_tick() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -void handle_timer_tick(void) +void handle_timer_tick(struct pt_regs *regs) { do_timer(1); #ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); + update_process_times(user_mode(regs)); #endif - if (current->pid) - profile_tick(CPU_PROFILING); + profile_tick(CPU_PROFILING, regs); #ifdef CONFIG_HEARTBEAT if (sh_mv.mv_heartbeat != NULL) diff --git a/trunk/arch/sh/kernel/timers/timer-tmu.c b/trunk/arch/sh/kernel/timers/timer-tmu.c index 24927015dc31..205816fcf0da 100644 --- a/trunk/arch/sh/kernel/timers/timer-tmu.c +++ b/trunk/arch/sh/kernel/timers/timer-tmu.c @@ -80,7 +80,8 @@ static unsigned long tmu_timer_get_offset(void) return count; } -static irqreturn_t tmu_timer_interrupt(int irq, void *dummy) +static irqreturn_t tmu_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { unsigned long timer_status; @@ -97,7 +98,7 @@ static irqreturn_t tmu_timer_interrupt(int irq, void *dummy) * locally disabled. -arca */ write_seqlock(&xtime_lock); - handle_timer_tick(); + handle_timer_tick(regs); write_sequnlock(&xtime_lock); return IRQ_HANDLED; @@ -110,6 +111,60 @@ static struct irqaction tmu_irq = { .mask = CPU_MASK_NONE, }; +/* + * Hah! We'll see if this works (switching from usecs to nsecs). + */ +static unsigned long tmu_timer_get_frequency(void) +{ + u32 freq; + struct timespec ts1, ts2; + unsigned long diff_nsec; + unsigned long factor; + + /* Setup the timer: We don't want to generate interrupts, just + * have it count down at its natural rate. + */ + ctrl_outb(0, TMU_TSTR); +#if !defined(CONFIG_CPU_SUBTYPE_SH7300) && !defined(CONFIG_CPU_SUBTYPE_SH7760) + ctrl_outb(TMU_TOCR_INIT, TMU_TOCR); +#endif + ctrl_outw(TMU0_TCR_CALIB, TMU0_TCR); + ctrl_outl(0xffffffff, TMU0_TCOR); + ctrl_outl(0xffffffff, TMU0_TCNT); + + rtc_sh_get_time(&ts2); + + do { + rtc_sh_get_time(&ts1); + } while (ts1.tv_nsec == ts2.tv_nsec && ts1.tv_sec == ts2.tv_sec); + + /* actually start the timer */ + ctrl_outb(TMU_TSTR_INIT, TMU_TSTR); + + do { + rtc_sh_get_time(&ts2); + } while (ts1.tv_nsec == ts2.tv_nsec && ts1.tv_sec == ts2.tv_sec); + + freq = 0xffffffff - ctrl_inl(TMU0_TCNT); + if (ts2.tv_nsec < ts1.tv_nsec) { + ts2.tv_nsec += 1000000000; + ts2.tv_sec--; + } + + diff_nsec = (ts2.tv_sec - ts1.tv_sec) * 1000000000 + (ts2.tv_nsec - ts1.tv_nsec); + + /* this should work well if the RTC has a precision of n Hz, where + * n is an integer. I don't think we have to worry about the other + * cases. */ + factor = (1000000000 + diff_nsec/2) / diff_nsec; + + if (factor * diff_nsec > 1100000000 || + factor * diff_nsec < 900000000) + panic("weird RTC (diff_nsec %ld)", diff_nsec); + + return freq * factor; +} + static void tmu_clk_init(struct clk *clk) { u8 divisor = TMU0_TCR_INIT & 0x7; @@ -177,12 +232,12 @@ struct sys_timer_ops tmu_timer_ops = { .init = tmu_timer_init, .start = tmu_timer_start, .stop = tmu_timer_stop, -#ifndef CONFIG_GENERIC_TIME + .get_frequency = tmu_timer_get_frequency, .get_offset = tmu_timer_get_offset, -#endif }; struct sys_timer tmu_timer = { .name = "tmu", .ops = &tmu_timer_ops, }; + diff --git a/trunk/arch/sh/mm/consistent.c b/trunk/arch/sh/mm/consistent.c index 38c82d890ffd..c81e6b67ad30 100644 --- a/trunk/arch/sh/mm/consistent.c +++ b/trunk/arch/sh/mm/consistent.c @@ -28,7 +28,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle) split_page(page, order); ret = page_address(page); - memset(ret, 0, size); *handle = virt_to_phys(ret); /* diff --git a/trunk/arch/sparc/kernel/pcic.c b/trunk/arch/sparc/kernel/pcic.c index 207f1b6eef53..b4e50ae323bf 100644 --- a/trunk/arch/sparc/kernel/pcic.c +++ b/trunk/arch/sparc/kernel/pcic.c @@ -34,7 +34,6 @@ #include #include #include -#include unsigned int pcic_pin_to_irq(unsigned int pin, char *name); diff --git a/trunk/arch/sparc/kernel/setup.c b/trunk/arch/sparc/kernel/setup.c index f5ee1ac834bc..0251cab4708b 100644 --- a/trunk/arch/sparc/kernel/setup.c +++ b/trunk/arch/sparc/kernel/setup.c @@ -121,6 +121,16 @@ static struct console prom_debug_console = { .index = -1, }; +int obp_system_intr(void) +{ + if (boot_flags & BOOTME_DEBUG) { + printk("OBP: system interrupted\n"); + prom_halt(); + return 1; + } + return 0; +} + /* * Process kernel command line switches that are specific to the * SPARC or that require special low-level processing. diff --git a/trunk/arch/sparc/kernel/vmlinux.lds.S b/trunk/arch/sparc/kernel/vmlinux.lds.S index 1dd78c84888a..346c19a949fd 100644 --- a/trunk/arch/sparc/kernel/vmlinux.lds.S +++ b/trunk/arch/sparc/kernel/vmlinux.lds.S @@ -36,11 +36,11 @@ SECTIONS . = ALIGN(4096); __init_begin = .; - _sinittext = .; .init.text : { + _sinittext = .; *(.init.text) + _einittext = .; } - _einittext = .; __init_text_end = .; .init.data : { *(.init.data) } . = ALIGN(16); diff --git a/trunk/arch/sparc/mm/srmmu.c b/trunk/arch/sparc/mm/srmmu.c index 0df7121cef07..b27a506309ee 100644 --- a/trunk/arch/sparc/mm/srmmu.c +++ b/trunk/arch/sparc/mm/srmmu.c @@ -402,7 +402,7 @@ void srmmu_nocache_calcsize(void) srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size; } -void __init srmmu_nocache_init(void) +void srmmu_nocache_init(void) { unsigned int bitmap_bits; pgd_t *pgd; diff --git a/trunk/arch/sparc64/kernel/setup.c b/trunk/arch/sparc64/kernel/setup.c index cc8ad480a204..958287448cfe 100644 --- a/trunk/arch/sparc64/kernel/setup.c +++ b/trunk/arch/sparc64/kernel/setup.c @@ -91,6 +91,16 @@ void kernel_enter_debugger(void) { } +int obp_system_intr(void) +{ + if (boot_flags & BOOTME_DEBUG) { + printk("OBP: system interrupted\n"); + prom_halt(); + return 1; + } + return 0; +} + /* * Process kernel command line switches that are specific to the * SPARC or that require special low-level processing. diff --git a/trunk/arch/um/Kconfig b/trunk/arch/um/Kconfig index 78fb619bdb73..d75307589d74 100644 --- a/trunk/arch/um/Kconfig +++ b/trunk/arch/um/Kconfig @@ -25,19 +25,6 @@ config PCI config PCMCIA bool -# Yet to do! -config TRACE_IRQFLAGS_SUPPORT - bool - default n - -config LOCKDEP_SUPPORT - bool - default y - -config STACKTRACE_SUPPORT - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -50,15 +37,13 @@ config IRQ_RELEASE_METHOD menu "UML-specific options" config MODE_TT - bool "Tracing thread support (DEPRECATED)" + bool "Tracing thread support" default n help This option controls whether tracing thread support is compiled - into UML. This option is largely obsolete, given that skas0 provides + into UML. This option is largely obsolete, given that skas0 provides skas security and performance without needing to patch the host. - It is safe to say 'N' here; saying 'Y' may cause additional problems - with the resulting binary even if you run UML in SKAS mode, and running - in TT mode is strongly *NOT RECOMMENDED*. + It is safe to say 'N' here. config STATIC_LINK bool "Force a static link" @@ -71,9 +56,6 @@ config STATIC_LINK for use in a chroot jail. So, if you intend to run UML inside a chroot, and you disable CONFIG_MODE_TT, you probably want to say Y here. - Additionally, this option enables using higher memory spaces (up to - 2.75G) for UML - disabling CONFIG_MODE_TT and enabling this option leads - to best results for this. config KERNEL_HALF_GIGS int "Kernel address space size (in .5G units)" @@ -90,13 +72,10 @@ config MODE_SKAS default y help This option controls whether skas (separate kernel address space) - support is compiled in. - Unless you have specific needs to use TT mode (which applies almost only - to developers), you should say Y here. - SKAS mode will make use of the SKAS3 patch if it is applied on the host - (and your UML will run in SKAS3 mode), but if no SKAS patch is applied - on the host it will run in SKAS0 mode, which is anyway faster than TT - mode. + support is compiled in. If you have applied the skas patch to the + host, then you certainly want to say Y here (and consider saying N + to CONFIG_MODE_TT). Otherwise, it is safe to say Y. Disabling this + option will shrink the UML binary slightly. source "arch/um/Kconfig.arch" source "mm/Kconfig" diff --git a/trunk/arch/um/Kconfig.i386 b/trunk/arch/um/Kconfig.i386 index f191a550a079..f6eb72d117b9 100644 --- a/trunk/arch/um/Kconfig.i386 +++ b/trunk/arch/um/Kconfig.i386 @@ -16,42 +16,23 @@ config SEMAPHORE_SLEEPERS bool default y -choice - prompt "Host memory split" - default HOST_VMSPLIT_3G - ---help--- - This is needed when the host kernel on which you run has a non-default - (like 2G/2G) memory split, instead of the customary 3G/1G. If you did - not recompile your own kernel but use the default distro's one, you can - safely accept the "Default split" option. - - It can be enabled on recent (>=2.6.16-rc2) vanilla kernels via - CONFIG_VM_SPLIT_*, or on previous kernels with special patches (-ck - patchset by Con Kolivas, or other ones) - option names match closely the - host CONFIG_VM_SPLIT_* ones. - - A lower setting (where 1G/3G is lowest and 3G/1G is higher) will - tolerate even more "normal" host kernels, but an higher setting will be - stricter. - - So, if you do not know what to do here, say 'Default split'. - - config HOST_VMSPLIT_3G - bool "Default split (3G/1G user/kernel host split)" - config HOST_VMSPLIT_3G_OPT - bool "3G/1G user/kernel host split (for full 1G low memory)" - config HOST_VMSPLIT_2G - bool "2G/2G user/kernel host split" - config HOST_VMSPLIT_1G - bool "1G/3G user/kernel host split" -endchoice +config HOST_2G_2G + bool "2G/2G host address space split" + default n + help + This is needed when the host on which you run has a 2G/2G memory + split, instead of the customary 3G/1G. + + Note that to enable such a host + configuration, which makes sense only in some cases, you need special + host patches. + + So, if you do not know what to do here, say 'N'. config TOP_ADDR - hex - default 0xB0000000 if HOST_VMSPLIT_3G_OPT - default 0x78000000 if HOST_VMSPLIT_2G - default 0x40000000 if HOST_VMSPLIT_1G - default 0xC0000000 + hex + default 0xc0000000 if !HOST_2G_2G + default 0x80000000 if HOST_2G_2G config 3_LEVEL_PGTABLES bool "Three-level pagetables (EXPERIMENTAL)" diff --git a/trunk/arch/um/Makefile-x86_64 b/trunk/arch/um/Makefile-x86_64 index d278682dd799..11154b6773ec 100644 --- a/trunk/arch/um/Makefile-x86_64 +++ b/trunk/arch/um/Makefile-x86_64 @@ -1,10 +1,10 @@ # Copyright 2003 - 2004 Pathscale, Inc # Released under the GPL -core-y += arch/um/sys-x86_64/ arch/x86_64/crypto/ +core-y += arch/um/sys-x86_64/ START := 0x60000000 -_extra_flags_ = -fno-builtin -m64 +_extra_flags_ = -fno-builtin -m64 -mcmodel=kernel #We #undef __x86_64__ for kernelspace, not for userspace where #it's needed for headers to work! diff --git a/trunk/arch/um/include/common-offsets.h b/trunk/arch/um/include/common-offsets.h index 461175f8b1d9..356390d1f8b9 100644 --- a/trunk/arch/um/include/common-offsets.h +++ b/trunk/arch/um/include/common-offsets.h @@ -1,16 +1,9 @@ /* for use by sys-$SUBARCH/kernel-offsets.c */ -DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); -#ifdef CONFIG_MODE_TT -OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); -#endif - OFFSET(HOST_TASK_REGS, task_struct, thread.regs); OFFSET(HOST_TASK_PID, task_struct, pid); - DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); @@ -19,10 +12,6 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); DEFINE_STR(UM_KERN_INFO, KERN_INFO); DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); - DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); - -/* For crypto assembler code. */ -DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); diff --git a/trunk/arch/um/include/kern_util.h b/trunk/arch/um/include/kern_util.h index cec9fcc57bf5..59cfa9e0cad0 100644 --- a/trunk/arch/um/include/kern_util.h +++ b/trunk/arch/um/include/kern_util.h @@ -6,6 +6,7 @@ #ifndef __KERN_UTIL_H__ #define __KERN_UTIL_H__ +#include "linux/threads.h" #include "sysdep/ptrace.h" #include "sysdep/faultinfo.h" diff --git a/trunk/arch/um/include/longjmp.h b/trunk/arch/um/include/longjmp.h index e860bc5848e0..e93c6d3e893b 100644 --- a/trunk/arch/um/include/longjmp.h +++ b/trunk/arch/um/include/longjmp.h @@ -12,8 +12,7 @@ extern void longjmp(jmp_buf, int); } while(0) #define UML_SETJMP(buf) ({ \ - int n; \ - volatile int enable; \ + int n, enable; \ enable = get_signals(); \ n = setjmp(*buf); \ if(n != 0) \ diff --git a/trunk/arch/um/include/os.h b/trunk/arch/um/include/os.h index 6516f6dca96d..120ca21a513a 100644 --- a/trunk/arch/um/include/os.h +++ b/trunk/arch/um/include/os.h @@ -201,7 +201,6 @@ extern int os_getpgrp(void); #ifdef UML_CONFIG_MODE_TT extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); -extern void stop(void); #endif extern void init_new_thread_signals(void); extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); diff --git a/trunk/arch/um/include/sysdep-i386/kernel-offsets.h b/trunk/arch/um/include/sysdep-i386/kernel-offsets.h index 97ec9d894d75..2c13de321f2f 100644 --- a/trunk/arch/um/include/sysdep-i386/kernel-offsets.h +++ b/trunk/arch/um/include/sysdep-i386/kernel-offsets.h @@ -1,7 +1,6 @@ #include #include #include -#include #include #define DEFINE(sym, val) \ @@ -18,5 +17,9 @@ void foo(void) { OFFSET(HOST_TASK_DEBUGREGS, task_struct, thread.arch.debugregs); + DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); +#ifdef CONFIG_MODE_TT + OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif #include } diff --git a/trunk/arch/um/include/sysdep-x86_64/kernel-offsets.h b/trunk/arch/um/include/sysdep-x86_64/kernel-offsets.h index a307237b7964..91d129fb3930 100644 --- a/trunk/arch/um/include/sysdep-x86_64/kernel-offsets.h +++ b/trunk/arch/um/include/sysdep-x86_64/kernel-offsets.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -19,5 +18,9 @@ void foo(void) { + DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); +#ifdef CONFIG_MODE_TT + OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif #include } diff --git a/trunk/arch/um/kernel/skas/mmu.c b/trunk/arch/um/kernel/skas/mmu.c index 2c6d090a2e87..c17eddcf89b3 100644 --- a/trunk/arch/um/kernel/skas/mmu.c +++ b/trunk/arch/um/kernel/skas/mmu.c @@ -60,7 +60,10 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, #endif *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); - *pte = pte_mkread(*pte); + /* This is wrong for the code page, but it doesn't matter since the + * stub is mapped by hand with the correct permissions. + */ + *pte = pte_mkwrite(*pte); return(0); out_pmd: diff --git a/trunk/arch/um/kernel/tt/uaccess_user.c b/trunk/arch/um/kernel/tt/uaccess_user.c index ed1abcf4d057..6c92bbccb49c 100644 --- a/trunk/arch/um/kernel/tt/uaccess_user.c +++ b/trunk/arch/um/kernel/tt/uaccess_user.c @@ -4,13 +4,13 @@ * Licensed under the GPL */ +#include #include #include "user_util.h" #include "uml_uaccess.h" #include "task.h" #include "kern_util.h" #include "os.h" -#include "longjmp.h" int __do_copy_from_user(void *to, const void *from, int n, void **fault_addr, void **fault_catcher) @@ -80,10 +80,10 @@ int __do_strnlen_user(const char *str, unsigned long n, struct tt_regs save = TASK_REGS(get_current())->tt; int ret; unsigned long *faddrp = (unsigned long *)fault_addr; - jmp_buf jbuf; + sigjmp_buf jbuf; *fault_catcher = &jbuf; - if(UML_SETJMP(&jbuf) == 0) + if(sigsetjmp(jbuf, 1) == 0) ret = strlen(str) + 1; else ret = *faddrp - (unsigned long) str; diff --git a/trunk/arch/um/os-Linux/tt.c b/trunk/arch/um/os-Linux/tt.c index 3dc3a02d6263..5461a065bbb9 100644 --- a/trunk/arch/um/os-Linux/tt.c +++ b/trunk/arch/um/os-Linux/tt.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/arch/um/os-Linux/util.c b/trunk/arch/um/os-Linux/util.c index 56b8a50e8bc2..3f5b1514e8a7 100644 --- a/trunk/arch/um/os-Linux/util.c +++ b/trunk/arch/um/os-Linux/util.c @@ -80,18 +80,11 @@ void setup_machinename(char *machine_out) struct utsname host; uname(&host); -#ifdef UML_CONFIG_UML_X86 -# ifndef UML_CONFIG_64BIT +#if defined(UML_CONFIG_UML_X86) && !defined(UML_CONFIG_64BIT) if (!strcmp(host.machine, "x86_64")) { strcpy(machine_out, "i686"); return; } -# else - if (!strcmp(host.machine, "i686")) { - strcpy(machine_out, "x86_64"); - return; - } -# endif #endif strcpy(machine_out, host.machine); } diff --git a/trunk/arch/um/sys-x86_64/ksyms.c b/trunk/arch/um/sys-x86_64/ksyms.c index 12c593607c59..859273808203 100644 --- a/trunk/arch/um/sys-x86_64/ksyms.c +++ b/trunk/arch/um/sys-x86_64/ksyms.c @@ -14,3 +14,6 @@ EXPORT_SYMBOL(__up_wakeup); /*XXX: we need them because they would be exported by x86_64 */ EXPORT_SYMBOL(__memcpy); + +/* Networking helper routines. */ +EXPORT_SYMBOL(ip_compute_csum); diff --git a/trunk/arch/um/sys-x86_64/stub_segv.c b/trunk/arch/um/sys-x86_64/stub_segv.c index 652fa34c2cd3..1c967026c957 100644 --- a/trunk/arch/um/sys-x86_64/stub_segv.c +++ b/trunk/arch/um/sys-x86_64/stub_segv.c @@ -5,6 +5,7 @@ #include #include +#include #include #include "uml-config.h" #include "sysdep/sigcontext.h" diff --git a/trunk/arch/x86_64/kernel/cpufreq/Kconfig b/trunk/arch/x86_64/kernel/cpufreq/Kconfig index 3abcfa3e1ed7..81f1562e5393 100644 --- a/trunk/arch/x86_64/kernel/cpufreq/Kconfig +++ b/trunk/arch/x86_64/kernel/cpufreq/Kconfig @@ -27,13 +27,10 @@ config X86_POWERNOW_K8_ACPI default y config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep (deprecated)" + tristate "Intel Enhanced SpeedStep" select CPU_FREQ_TABLE depends on ACPI_PROCESSOR help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. This adds the CPUFreq driver for Enhanced SpeedStep enabled mobile CPUs. This means Intel Pentium M (Centrino) CPUs or 64bit enabled Intel Xeons. @@ -53,7 +50,6 @@ config X86_ACPI_CPUFREQ help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. - This driver also supports Intel Enhanced Speedstep. For details, take a look at . diff --git a/trunk/arch/x86_64/kernel/cpufreq/Makefile b/trunk/arch/x86_64/kernel/cpufreq/Makefile index 753ce1dd418e..d8b593879224 100644 --- a/trunk/arch/x86_64/kernel/cpufreq/Makefile +++ b/trunk/arch/x86_64/kernel/cpufreq/Makefile @@ -5,8 +5,8 @@ SRCDIR := ../../../i386/kernel/cpu/cpufreq obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o diff --git a/trunk/arch/x86_64/kernel/io_apic.c b/trunk/arch/x86_64/kernel/io_apic.c index 44b55f833875..771bcf77daf2 100644 --- a/trunk/arch/x86_64/kernel/io_apic.c +++ b/trunk/arch/x86_64/kernel/io_apic.c @@ -660,7 +660,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) } if (old_vector >= 0) { int old_cpu; - for_each_cpu_mask(old_cpu, irq_domain[irq]) + for_each_cpu_mask(old_cpu, domain) per_cpu(vector_irq, old_cpu)[old_vector] = -1; } for_each_cpu_mask(new_cpu, domain) @@ -1897,7 +1897,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -static struct irq_chip ht_irq_chip = { +static struct hw_interrupt_type ht_irq_chip = { .name = "PCI-HT", .mask = mask_ht_irq, .unmask = unmask_ht_irq, diff --git a/trunk/arch/x86_64/kernel/process.c b/trunk/arch/x86_64/kernel/process.c index 49f7fac6229e..5e95b257ee26 100644 --- a/trunk/arch/x86_64/kernel/process.c +++ b/trunk/arch/x86_64/kernel/process.c @@ -238,28 +238,20 @@ void cpu_idle (void) * We execute MONITOR against need_resched and enter optimized wait state * through MWAIT. Whenever someone changes need_resched, we would be woken * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. */ -void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static void mwait_idle(void) { - if (!need_resched()) { + local_irq_enable(); + + while (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (!need_resched()) - __mwait(eax, ecx); + if (need_resched()) + break; + __mwait(0, 0); } } -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0,0); -} - void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { static int printed; diff --git a/trunk/arch/x86_64/kernel/vsmp.c b/trunk/arch/x86_64/kernel/vsmp.c index 414caf0c5f9a..044e852bd25e 100644 --- a/trunk/arch/x86_64/kernel/vsmp.c +++ b/trunk/arch/x86_64/kernel/vsmp.c @@ -14,7 +14,6 @@ #include #include #include -#include static int __init vsmp_init(void) { diff --git a/trunk/arch/x86_64/mm/init.c b/trunk/arch/x86_64/mm/init.c index 971dc1181e69..19c72520a868 100644 --- a/trunk/arch/x86_64/mm/init.c +++ b/trunk/arch/x86_64/mm/init.c @@ -406,12 +406,9 @@ void __cpuinit zap_low_mappings(int cpu) #ifndef CONFIG_NUMA void __init paging_init(void) { - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; - max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; - max_zone_pfns[ZONE_NORMAL] = end_pfn; - + unsigned long max_zone_pfns[MAX_NR_ZONES] = {MAX_DMA_PFN, + MAX_DMA32_PFN, + end_pfn}; memory_present(0, 0, end_pfn); sparse_init(); free_area_init_nodes(max_zone_pfns); diff --git a/trunk/arch/x86_64/mm/numa.c b/trunk/arch/x86_64/mm/numa.c index 2ee2e003606c..829a008bd39b 100644 --- a/trunk/arch/x86_64/mm/numa.c +++ b/trunk/arch/x86_64/mm/numa.c @@ -338,11 +338,9 @@ static void __init arch_sparse_init(void) void __init paging_init(void) { int i; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; - max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; - max_zone_pfns[ZONE_NORMAL] = end_pfn; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { MAX_DMA_PFN, + MAX_DMA32_PFN, + end_pfn}; arch_sparse_init(); diff --git a/trunk/block/elevator.c b/trunk/block/elevator.c index 8ccd163254b8..487dd3da8853 100644 --- a/trunk/block/elevator.c +++ b/trunk/block/elevator.c @@ -93,18 +93,21 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio) static struct elevator_type *elevator_find(const char *name) { - struct elevator_type *e; + struct elevator_type *e = NULL; struct list_head *entry; list_for_each(entry, &elv_list) { + struct elevator_type *__e; - e = list_entry(entry, struct elevator_type, list); + __e = list_entry(entry, struct elevator_type, list); - if (!strcmp(e->elevator_name, name)) - return e; + if (!strcmp(__e->elevator_name, name)) { + e = __e; + break; + } } - return NULL; + return e; } static void elevator_put(struct elevator_type *e) @@ -1085,7 +1088,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) struct list_head *entry; int len = 0; - spin_lock_irq(&elv_list_lock); + spin_lock_irq(q->queue_lock); list_for_each(entry, &elv_list) { struct elevator_type *__e; @@ -1095,7 +1098,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) else len += sprintf(name+len, "%s ", __e->elevator_name); } - spin_unlock_irq(&elv_list_lock); + spin_unlock_irq(q->queue_lock); len += sprintf(len+name, "\n"); return len; diff --git a/trunk/drivers/acpi/asus_acpi.c b/trunk/drivers/acpi/asus_acpi.c index c7ac9297a204..e9ee4c52a5f6 100644 --- a/trunk/drivers/acpi/asus_acpi.c +++ b/trunk/drivers/acpi/asus_acpi.c @@ -138,7 +138,6 @@ struct asus_hotk { S2x, //S200 (J1 reported), Victor MP-XP7210 W1N, //W1000N W5A, //W5A - W3V, //W3030V xxN, //M2400N, M3700N, M5200N, M6800N, S1300N, S5200N //(Centrino) END_MODEL @@ -377,17 +376,6 @@ static struct model_data model_conf[END_MODEL] = { .display_get = "\\ADVG"}, { - .name = "W3V", - .mt_mled = "MLED", - .mt_wled = "WLED", - .mt_lcd_switch = xxN_PREFIX "_Q10", - .lcd_status = "\\BKLT", - .brightness_set = "SPLV", - .brightness_get = "GPLV", - .display_set = "SDSP", - .display_get = "\\INFB"}, - - { .name = "xxN", .mt_mled = "MLED", /* WLED present, but not controlled by ACPI */ @@ -567,11 +555,11 @@ static int write_led(const char __user * buffer, unsigned long count, char *ledname, int ledmask, int invert) { - int rv, value; + int value; int led_out = 0; - rv = parse_arg(buffer, count, &value); - if (rv > 0) + count = parse_arg(buffer, count, &value); + if (count > 0) led_out = value ? 1 : 0; hotk->status = @@ -584,7 +572,7 @@ write_led(const char __user * buffer, unsigned long count, printk(KERN_WARNING "Asus ACPI: LED (%s) write failed\n", ledname); - return rv; + return count; } /* @@ -619,18 +607,20 @@ static int proc_write_ledd(struct file *file, const char __user * buffer, unsigned long count, void *data) { - int rv, value; + int value; - rv = parse_arg(buffer, count, &value); - if (rv > 0) { + count = parse_arg(buffer, count, &value); + if (count > 0) { if (!write_acpi_int (hotk->handle, hotk->methods->mt_ledd, value, NULL)) printk(KERN_WARNING "Asus ACPI: LED display write failed\n"); else hotk->ledd_status = (u32) value; - } - return rv; + } else if (count < 0) + printk(KERN_WARNING "Asus ACPI: Error reading user input\n"); + + return count; } /* @@ -771,12 +761,12 @@ static int proc_write_lcd(struct file *file, const char __user * buffer, unsigned long count, void *data) { - int rv, value; + int value; - rv = parse_arg(buffer, count, &value); - if (rv > 0) + count = parse_arg(buffer, count, &value); + if (count > 0) set_lcd_state(value); - return rv; + return count; } static int read_brightness(void) @@ -840,15 +830,18 @@ static int proc_write_brn(struct file *file, const char __user * buffer, unsigned long count, void *data) { - int rv, value; + int value; - rv = parse_arg(buffer, count, &value); - if (rv > 0) { + count = parse_arg(buffer, count, &value); + if (count > 0) { value = (0 < value) ? ((15 < value) ? 15 : value) : 0; /* 0 <= value <= 15 */ set_brightness(value); + } else if (count < 0) { + printk(KERN_WARNING "Asus ACPI: Error reading user input\n"); } - return rv; + + return count; } static void set_display(int value) @@ -887,12 +880,15 @@ static int proc_write_disp(struct file *file, const char __user * buffer, unsigned long count, void *data) { - int rv, value; + int value; - rv = parse_arg(buffer, count, &value); - if (rv > 0) + count = parse_arg(buffer, count, &value); + if (count > 0) set_display(value); - return rv; + else if (count < 0) + printk(KERN_WARNING "Asus ACPI: Error reading user input\n"); + + return count; } typedef int (proc_readfunc) (char *page, char **start, off_t off, int count, @@ -1101,8 +1097,6 @@ static int asus_model_match(char *model) return A4G; else if (strncmp(model, "W1N", 3) == 0) return W1N; - else if (strncmp(model, "W3V", 3) == 0) - return W3V; else if (strncmp(model, "W5A", 3) == 0) return W5A; else @@ -1206,10 +1200,9 @@ static int asus_hotk_get_info(void) hotk->methods->mt_wled = NULL; /* L5D's WLED is not controlled by ACPI */ else if (strncmp(string, "M2N", 3) == 0 || - strncmp(string, "W3V", 3) == 0 || strncmp(string, "S1N", 3) == 0) hotk->methods->mt_wled = "WLED"; - /* M2N, S1N and W3V have a usable WLED */ + /* M2N and S1N have a usable WLED */ else if (asus_info) { if (strncmp(asus_info->oem_table_id, "L1", 2) == 0) hotk->methods->mled_status = NULL; diff --git a/trunk/drivers/acpi/battery.c b/trunk/drivers/acpi/battery.c index 026e40755cdd..9810e2a55d0a 100644 --- a/trunk/drivers/acpi/battery.c +++ b/trunk/drivers/acpi/battery.c @@ -64,7 +64,6 @@ extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir); static int acpi_battery_add(struct acpi_device *device); static int acpi_battery_remove(struct acpi_device *device, int type); -static int acpi_battery_resume(struct acpi_device *device, int status); static struct acpi_driver acpi_battery_driver = { .name = ACPI_BATTERY_DRIVER_NAME, @@ -72,7 +71,6 @@ static struct acpi_driver acpi_battery_driver = { .ids = ACPI_BATTERY_HID, .ops = { .add = acpi_battery_add, - .resume = acpi_battery_resume, .remove = acpi_battery_remove, }, }; @@ -755,18 +753,6 @@ static int acpi_battery_remove(struct acpi_device *device, int type) return 0; } -/* this is needed to learn about changes made in suspended state */ -static int acpi_battery_resume(struct acpi_device *device, int state) -{ - struct acpi_battery *battery; - - if (!device) - return -EINVAL; - - battery = device->driver_data; - return acpi_battery_check(battery); -} - static int __init acpi_battery_init(void) { int result; diff --git a/trunk/drivers/acpi/ec.c b/trunk/drivers/acpi/ec.c index e6d4b084dca2..e5d796362854 100644 --- a/trunk/drivers/acpi/ec.c +++ b/trunk/drivers/acpi/ec.c @@ -45,143 +45,206 @@ ACPI_MODULE_NAME("acpi_ec") #define ACPI_EC_DRIVER_NAME "ACPI Embedded Controller Driver" #define ACPI_EC_DEVICE_NAME "Embedded Controller" #define ACPI_EC_FILE_INFO "info" - -/* EC status register */ #define ACPI_EC_FLAG_OBF 0x01 /* Output buffer full */ #define ACPI_EC_FLAG_IBF 0x02 /* Input buffer full */ #define ACPI_EC_FLAG_BURST 0x10 /* burst mode */ #define ACPI_EC_FLAG_SCI 0x20 /* EC-SCI occurred */ - -/* EC commands */ +#define ACPI_EC_EVENT_OBF 0x01 /* Output buffer full */ +#define ACPI_EC_EVENT_IBE 0x02 /* Input buffer empty */ +#define ACPI_EC_DELAY 50 /* Wait 50ms max. during EC ops */ +#define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ +#define ACPI_EC_UDELAY 100 /* Poll @ 100us increments */ +#define ACPI_EC_UDELAY_COUNT 1000 /* Wait 10ms max. during EC ops */ #define ACPI_EC_COMMAND_READ 0x80 #define ACPI_EC_COMMAND_WRITE 0x81 #define ACPI_EC_BURST_ENABLE 0x82 #define ACPI_EC_BURST_DISABLE 0x83 #define ACPI_EC_COMMAND_QUERY 0x84 - -/* EC events */ -enum { - ACPI_EC_EVENT_OBF_1 = 1, /* Output buffer full */ - ACPI_EC_EVENT_IBF_0, /* Input buffer empty */ -}; - -#define ACPI_EC_DELAY 50 /* Wait 50ms max. during EC ops */ -#define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ -#define ACPI_EC_UDELAY 100 /* Poll @ 100us increments */ -#define ACPI_EC_UDELAY_COUNT 1000 /* Wait 10ms max. during EC ops */ - -enum { - EC_INTR = 1, /* Output buffer full */ - EC_POLL, /* Input buffer empty */ -}; - +#define EC_POLL 0xFF +#define EC_INTR 0x00 static int acpi_ec_remove(struct acpi_device *device, int type); static int acpi_ec_start(struct acpi_device *device); static int acpi_ec_stop(struct acpi_device *device, int type); -static int acpi_ec_add(struct acpi_device *device); +static int acpi_ec_intr_add(struct acpi_device *device); +static int acpi_ec_poll_add(struct acpi_device *device); static struct acpi_driver acpi_ec_driver = { .name = ACPI_EC_DRIVER_NAME, .class = ACPI_EC_CLASS, .ids = ACPI_EC_HID, .ops = { - .add = acpi_ec_add, + .add = acpi_ec_intr_add, .remove = acpi_ec_remove, .start = acpi_ec_start, .stop = acpi_ec_stop, }, }; +union acpi_ec { + struct { + u32 mode; + acpi_handle handle; + unsigned long uid; + unsigned long gpe_bit; + struct acpi_generic_address status_addr; + struct acpi_generic_address command_addr; + struct acpi_generic_address data_addr; + unsigned long global_lock; + } common; + + struct { + u32 mode; + acpi_handle handle; + unsigned long uid; + unsigned long gpe_bit; + struct acpi_generic_address status_addr; + struct acpi_generic_address command_addr; + struct acpi_generic_address data_addr; + unsigned long global_lock; + unsigned int expect_event; + atomic_t leaving_burst; /* 0 : No, 1 : Yes, 2: abort */ + atomic_t pending_gpe; + struct semaphore sem; + wait_queue_head_t wait; + } intr; + + struct { + u32 mode; + acpi_handle handle; + unsigned long uid; + unsigned long gpe_bit; + struct acpi_generic_address status_addr; + struct acpi_generic_address command_addr; + struct acpi_generic_address data_addr; + unsigned long global_lock; + struct semaphore sem; + } poll; +}; +static int acpi_ec_poll_wait(union acpi_ec *ec, u8 event); +static int acpi_ec_intr_wait(union acpi_ec *ec, unsigned int event); +static int acpi_ec_poll_read(union acpi_ec *ec, u8 address, u32 * data); +static int acpi_ec_intr_read(union acpi_ec *ec, u8 address, u32 * data); +static int acpi_ec_poll_write(union acpi_ec *ec, u8 address, u8 data); +static int acpi_ec_intr_write(union acpi_ec *ec, u8 address, u8 data); +static int acpi_ec_poll_query(union acpi_ec *ec, u32 * data); +static int acpi_ec_intr_query(union acpi_ec *ec, u32 * data); +static void acpi_ec_gpe_poll_query(void *ec_cxt); +static void acpi_ec_gpe_intr_query(void *ec_cxt); +static u32 acpi_ec_gpe_poll_handler(void *data); +static u32 acpi_ec_gpe_intr_handler(void *data); +static acpi_status __init +acpi_fake_ecdt_poll_callback(acpi_handle handle, + u32 Level, void *context, void **retval); + +static acpi_status __init +acpi_fake_ecdt_intr_callback(acpi_handle handle, + u32 Level, void *context, void **retval); + +static int __init acpi_ec_poll_get_real_ecdt(void); +static int __init acpi_ec_intr_get_real_ecdt(void); /* If we find an EC via the ECDT, we need to keep a ptr to its context */ -struct acpi_ec { - acpi_handle handle; - unsigned long uid; - unsigned long gpe_bit; - unsigned long command_addr; - unsigned long data_addr; - unsigned long global_lock; - struct semaphore sem; - unsigned int expect_event; - atomic_t leaving_burst; /* 0 : No, 1 : Yes, 2: abort */ - wait_queue_head_t wait; -} *ec_ecdt; +static union acpi_ec *ec_ecdt; /* External interfaces use first EC only, so remember */ static struct acpi_device *first_ec; -static int acpi_ec_mode = EC_INTR; +static int acpi_ec_poll_mode = EC_INTR; /* -------------------------------------------------------------------------- Transaction Management -------------------------------------------------------------------------- */ -static inline u8 acpi_ec_read_status(struct acpi_ec *ec) +static u32 acpi_ec_read_status(union acpi_ec *ec) { - return inb(ec->command_addr); -} + u32 status = 0; -static inline u8 acpi_ec_read_data(struct acpi_ec *ec) -{ - return inb(ec->data_addr); + acpi_hw_low_level_read(8, &status, &ec->common.status_addr); + return status; } -static inline void acpi_ec_write_cmd(struct acpi_ec *ec, u8 command) +static int acpi_ec_wait(union acpi_ec *ec, u8 event) { - outb(command, ec->command_addr); + if (acpi_ec_poll_mode) + return acpi_ec_poll_wait(ec, event); + else + return acpi_ec_intr_wait(ec, event); } -static inline void acpi_ec_write_data(struct acpi_ec *ec, u8 data) +static int acpi_ec_poll_wait(union acpi_ec *ec, u8 event) { - outb(data, ec->data_addr); -} + u32 acpi_ec_status = 0; + u32 i = ACPI_EC_UDELAY_COUNT; -static int acpi_ec_check_status(u8 status, u8 event) -{ + if (!ec) + return -EINVAL; + + /* Poll the EC status register waiting for the event to occur. */ switch (event) { - case ACPI_EC_EVENT_OBF_1: - if (status & ACPI_EC_FLAG_OBF) - return 1; + case ACPI_EC_EVENT_OBF: + do { + acpi_hw_low_level_read(8, &acpi_ec_status, + &ec->common.status_addr); + if (acpi_ec_status & ACPI_EC_FLAG_OBF) + return 0; + udelay(ACPI_EC_UDELAY); + } while (--i > 0); break; - case ACPI_EC_EVENT_IBF_0: - if (!(status & ACPI_EC_FLAG_IBF)) - return 1; + case ACPI_EC_EVENT_IBE: + do { + acpi_hw_low_level_read(8, &acpi_ec_status, + &ec->common.status_addr); + if (!(acpi_ec_status & ACPI_EC_FLAG_IBF)) + return 0; + udelay(ACPI_EC_UDELAY); + } while (--i > 0); break; default: - break; + return -EINVAL; } - return 0; + return -ETIME; } - -static int acpi_ec_wait(struct acpi_ec *ec, u8 event) +static int acpi_ec_intr_wait(union acpi_ec *ec, unsigned int event) { - int i = (acpi_ec_mode == EC_POLL) ? ACPI_EC_UDELAY_COUNT : 0; - long time_left; + int result = 0; - ec->expect_event = event; - if (acpi_ec_check_status(acpi_ec_read_status(ec), event)) { - ec->expect_event = 0; - return 0; + + ec->intr.expect_event = event; + smp_mb(); + + switch (event) { + case ACPI_EC_EVENT_IBE: + if (~acpi_ec_read_status(ec) & ACPI_EC_FLAG_IBF) { + ec->intr.expect_event = 0; + return 0; + } + break; + default: + break; } - do { - if (acpi_ec_mode == EC_POLL) { - udelay(ACPI_EC_UDELAY); - } else { - time_left = wait_event_timeout(ec->wait, - !ec->expect_event, + result = wait_event_timeout(ec->intr.wait, + !ec->intr.expect_event, msecs_to_jiffies(ACPI_EC_DELAY)); - if (time_left > 0) { - ec->expect_event = 0; - return 0; - } - } - if (acpi_ec_check_status(acpi_ec_read_status(ec), event)) { - ec->expect_event = 0; + + ec->intr.expect_event = 0; + smp_mb(); + + /* + * Verify that the event in question has actually happened by + * querying EC status. Do the check even if operation timed-out + * to make sure that we did not miss interrupt. + */ + switch (event) { + case ACPI_EC_EVENT_OBF: + if (acpi_ec_read_status(ec) & ACPI_EC_FLAG_OBF) return 0; - } - } while (--i > 0); + break; - ec->expect_event = 0; + case ACPI_EC_EVENT_IBE: + if (~acpi_ec_read_status(ec) & ACPI_EC_FLAG_IBF) + return 0; + break; + } return -ETIME; } @@ -191,150 +254,272 @@ static int acpi_ec_wait(struct acpi_ec *ec, u8 event) * Note: samsung nv5000 doesn't work with ec burst mode. * http://bugzilla.kernel.org/show_bug.cgi?id=4980 */ -int acpi_ec_enter_burst_mode(struct acpi_ec *ec) +int acpi_ec_enter_burst_mode(union acpi_ec *ec) { - u8 tmp = 0; - u8 status = 0; + u32 tmp = 0; + int status = 0; status = acpi_ec_read_status(ec); if (status != -EINVAL && !(status & ACPI_EC_FLAG_BURST)) { - status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); if (status) goto end; - acpi_ec_write_cmd(ec, ACPI_EC_BURST_ENABLE); - status = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF_1); - tmp = acpi_ec_read_data(ec); + acpi_hw_low_level_write(8, ACPI_EC_BURST_ENABLE, + &ec->common.command_addr); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF); + acpi_hw_low_level_read(8, &tmp, &ec->common.data_addr); if (tmp != 0x90) { /* Burst ACK byte */ return -EINVAL; } } - atomic_set(&ec->leaving_burst, 0); + atomic_set(&ec->intr.leaving_burst, 0); return 0; - end: - ACPI_EXCEPTION((AE_INFO, status, "EC wait, burst mode")); + end: + ACPI_EXCEPTION ((AE_INFO, status, "EC wait, burst mode"); return -1; } -int acpi_ec_leave_burst_mode(struct acpi_ec *ec) +int acpi_ec_leave_burst_mode(union acpi_ec *ec) { - u8 status = 0; + int status = 0; status = acpi_ec_read_status(ec); if (status != -EINVAL && (status & ACPI_EC_FLAG_BURST)){ - status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0); + status = acpi_ec_wait(ec, ACPI_EC_FLAG_IBF); if(status) goto end; - acpi_ec_write_cmd(ec, ACPI_EC_BURST_DISABLE); - acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0); - } - atomic_set(&ec->leaving_burst, 1); + acpi_hw_low_level_write(8, ACPI_EC_BURST_DISABLE, &ec->common.command_addr); + acpi_ec_wait(ec, ACPI_EC_FLAG_IBF); + } + atomic_set(&ec->intr.leaving_burst, 1); return 0; - end: - ACPI_EXCEPTION((AE_INFO, status, "EC leave burst mode")); +end: + ACPI_EXCEPTION((AE_INFO, status, "EC leave burst mode"); return -1; } #endif /* ACPI_FUTURE_USAGE */ -static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, u8 command, - const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len) +static int acpi_ec_read(union acpi_ec *ec, u8 address, u32 * data) +{ + if (acpi_ec_poll_mode) + return acpi_ec_poll_read(ec, address, data); + else + return acpi_ec_intr_read(ec, address, data); +} +static int acpi_ec_write(union acpi_ec *ec, u8 address, u8 data) +{ + if (acpi_ec_poll_mode) + return acpi_ec_poll_write(ec, address, data); + else + return acpi_ec_intr_write(ec, address, data); +} +static int acpi_ec_poll_read(union acpi_ec *ec, u8 address, u32 * data) { - int result; + acpi_status status = AE_OK; + int result = 0; + u32 glk = 0; - acpi_ec_write_cmd(ec, command); - for (; wdata_len > 0; wdata_len --) { - result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0); - if (result) - return result; - acpi_ec_write_data(ec, *(wdata++)); + if (!ec || !data) + return -EINVAL; + + *data = 0; + + if (ec->common.global_lock) { + status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk); + if (ACPI_FAILURE(status)) + return -ENODEV; } - if (command == ACPI_EC_COMMAND_WRITE) { - result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0); - if (result) - return result; + if (down_interruptible(&ec->poll.sem)) { + result = -ERESTARTSYS; + goto end_nosem; } + + acpi_hw_low_level_write(8, ACPI_EC_COMMAND_READ, + &ec->common.command_addr); + result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (result) + goto end; + + acpi_hw_low_level_write(8, address, &ec->common.data_addr); + result = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF); + if (result) + goto end; + + acpi_hw_low_level_read(8, data, &ec->common.data_addr); + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Read [%02x] from address [%02x]\n", + *data, address)); + + end: + up(&ec->poll.sem); +end_nosem: + if (ec->common.global_lock) + acpi_release_global_lock(glk); + + return result; +} + +static int acpi_ec_poll_write(union acpi_ec *ec, u8 address, u8 data) +{ + int result = 0; + acpi_status status = AE_OK; + u32 glk = 0; - for (; rdata_len > 0; rdata_len --) { - result = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF_1); - if (result) - return result; - *(rdata++) = acpi_ec_read_data(ec); + if (!ec) + return -EINVAL; + + if (ec->common.global_lock) { + status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk); + if (ACPI_FAILURE(status)) + return -ENODEV; + } + + if (down_interruptible(&ec->poll.sem)) { + result = -ERESTARTSYS; + goto end_nosem; } + + acpi_hw_low_level_write(8, ACPI_EC_COMMAND_WRITE, + &ec->common.command_addr); + result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (result) + goto end; - return 0; + acpi_hw_low_level_write(8, address, &ec->common.data_addr); + result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (result) + goto end; + + acpi_hw_low_level_write(8, data, &ec->common.data_addr); + result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (result) + goto end; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Wrote [%02x] to address [%02x]\n", + data, address)); + + end: + up(&ec->poll.sem); +end_nosem: + if (ec->common.global_lock) + acpi_release_global_lock(glk); + + return result; } -static int acpi_ec_transaction(struct acpi_ec *ec, u8 command, - const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len) +static int acpi_ec_intr_read(union acpi_ec *ec, u8 address, u32 * data) { - int status; + int status = 0; u32 glk; - if (!ec || (wdata_len && !wdata) || (rdata_len && !rdata)) + + if (!ec || !data) return -EINVAL; - if (rdata) - memset(rdata, 0, rdata_len); + *data = 0; - if (ec->global_lock) { + if (ec->common.global_lock) { status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk); if (ACPI_FAILURE(status)) return -ENODEV; } - down(&ec->sem); - status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0); + WARN_ON(in_interrupt()); + down(&ec->intr.sem); + + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); if (status) { printk(KERN_DEBUG PREFIX "read EC, IB not empty\n"); goto end; } + acpi_hw_low_level_write(8, ACPI_EC_COMMAND_READ, + &ec->common.command_addr); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (status) { + printk(KERN_DEBUG PREFIX "read EC, IB not empty\n"); + } - status = acpi_ec_transaction_unlocked(ec, command, - wdata, wdata_len, - rdata, rdata_len); + acpi_hw_low_level_write(8, address, &ec->common.data_addr); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF); + if (status) { + printk(KERN_DEBUG PREFIX "read EC, OB not full\n"); + goto end; + } + acpi_hw_low_level_read(8, data, &ec->common.data_addr); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Read [%02x] from address [%02x]\n", + *data, address)); -end: - up(&ec->sem); + end: + up(&ec->intr.sem); - if (ec->global_lock) + if (ec->common.global_lock) acpi_release_global_lock(glk); return status; } -static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 *data) +static int acpi_ec_intr_write(union acpi_ec *ec, u8 address, u8 data) { - int result; - u8 d; + int status = 0; + u32 glk; - result = acpi_ec_transaction(ec, ACPI_EC_COMMAND_READ, - &address, 1, &d, 1); - *data = d; - return result; -} -static int acpi_ec_write(struct acpi_ec *ec, u8 address, u8 data) -{ - u8 wdata[2] = { address, data }; - return acpi_ec_transaction(ec, ACPI_EC_COMMAND_WRITE, - wdata, 2, NULL, 0); + if (!ec) + return -EINVAL; + + if (ec->common.global_lock) { + status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk); + if (ACPI_FAILURE(status)) + return -ENODEV; + } + + WARN_ON(in_interrupt()); + down(&ec->intr.sem); + + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (status) { + printk(KERN_DEBUG PREFIX "write EC, IB not empty\n"); + } + acpi_hw_low_level_write(8, ACPI_EC_COMMAND_WRITE, + &ec->common.command_addr); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (status) { + printk(KERN_DEBUG PREFIX "write EC, IB not empty\n"); + } + + acpi_hw_low_level_write(8, address, &ec->common.data_addr); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (status) { + printk(KERN_DEBUG PREFIX "write EC, IB not empty\n"); + } + + acpi_hw_low_level_write(8, data, &ec->common.data_addr); + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Wrote [%02x] to address [%02x]\n", + data, address)); + + up(&ec->intr.sem); + + if (ec->common.global_lock) + acpi_release_global_lock(glk); + + return status; } /* * Externally callable EC access functions. For now, assume 1 EC only */ -int ec_read(u8 addr, u8 *val) +int ec_read(u8 addr, u8 * val) { - struct acpi_ec *ec; + union acpi_ec *ec; int err; - u8 temp_data; + u32 temp_data; if (!first_ec) return -ENODEV; @@ -354,7 +539,7 @@ EXPORT_SYMBOL(ec_read); int ec_write(u8 addr, u8 val) { - struct acpi_ec *ec; + union acpi_ec *ec; int err; if (!first_ec) @@ -369,106 +554,255 @@ int ec_write(u8 addr, u8 val) EXPORT_SYMBOL(ec_write); -extern int ec_transaction(u8 command, - const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len) +static int acpi_ec_query(union acpi_ec *ec, u32 * data) +{ + if (acpi_ec_poll_mode) + return acpi_ec_poll_query(ec, data); + else + return acpi_ec_intr_query(ec, data); +} +static int acpi_ec_poll_query(union acpi_ec *ec, u32 * data) { - struct acpi_ec *ec; + int result = 0; + acpi_status status = AE_OK; + u32 glk = 0; - if (!first_ec) - return -ENODEV; - ec = acpi_driver_data(first_ec); + if (!ec || !data) + return -EINVAL; - return acpi_ec_transaction(ec, command, wdata, - wdata_len, rdata, rdata_len); -} + *data = 0; + + if (ec->common.global_lock) { + status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk); + if (ACPI_FAILURE(status)) + return -ENODEV; + } -EXPORT_SYMBOL(ec_transaction); + /* + * Query the EC to find out which _Qxx method we need to evaluate. + * Note that successful completion of the query causes the ACPI_EC_SCI + * bit to be cleared (and thus clearing the interrupt source). + */ + if (down_interruptible(&ec->poll.sem)) { + result = -ERESTARTSYS; + goto end_nosem; + } + + acpi_hw_low_level_write(8, ACPI_EC_COMMAND_QUERY, + &ec->common.command_addr); + result = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF); + if (result) + goto end; + + acpi_hw_low_level_read(8, data, &ec->common.data_addr); + if (!*data) + result = -ENODATA; + + end: + up(&ec->poll.sem); +end_nosem: + if (ec->common.global_lock) + acpi_release_global_lock(glk); -static int acpi_ec_query(struct acpi_ec *ec, u8 *data) + return result; +} +static int acpi_ec_intr_query(union acpi_ec *ec, u32 * data) { - int result; - u8 d; + int status = 0; + u32 glk; + + + if (!ec || !data) + return -EINVAL; + *data = 0; + + if (ec->common.global_lock) { + status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk); + if (ACPI_FAILURE(status)) + return -ENODEV; + } - if (!ec || !data) - return -EINVAL; + down(&ec->intr.sem); - /* - * Query the EC to find out which _Qxx method we need to evaluate. - * Note that successful completion of the query causes the ACPI_EC_SCI - * bit to be cleared (and thus clearing the interrupt source). - */ + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE); + if (status) { + printk(KERN_DEBUG PREFIX "query EC, IB not empty\n"); + goto end; + } + /* + * Query the EC to find out which _Qxx method we need to evaluate. + * Note that successful completion of the query causes the ACPI_EC_SCI + * bit to be cleared (and thus clearing the interrupt source). + */ + acpi_hw_low_level_write(8, ACPI_EC_COMMAND_QUERY, + &ec->common.command_addr); + status = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF); + if (status) { + printk(KERN_DEBUG PREFIX "query EC, OB not full\n"); + goto end; + } - result = acpi_ec_transaction(ec, ACPI_EC_COMMAND_QUERY, NULL, 0, &d, 1); - if (result) - return result; + acpi_hw_low_level_read(8, data, &ec->common.data_addr); + if (!*data) + status = -ENODATA; - if (!d) - return -ENODATA; + end: + up(&ec->intr.sem); + + if (ec->common.global_lock) + acpi_release_global_lock(glk); - *data = d; - return 0; + return status; } /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ -struct acpi_ec_query_data { +union acpi_ec_query_data { acpi_handle handle; u8 data; }; static void acpi_ec_gpe_query(void *ec_cxt) { - struct acpi_ec *ec = (struct acpi_ec *)ec_cxt; - u8 value = 0; - static char object_name[8]; + if (acpi_ec_poll_mode) + acpi_ec_gpe_poll_query(ec_cxt); + else + acpi_ec_gpe_intr_query(ec_cxt); +} - if (!ec) - goto end; +static void acpi_ec_gpe_poll_query(void *ec_cxt) +{ + union acpi_ec *ec = (union acpi_ec *)ec_cxt; + u32 value = 0; + static char object_name[5] = { '_', 'Q', '0', '0', '\0' }; + const char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + }; - value = acpi_ec_read_status(ec); + if (!ec_cxt) + goto end; + + if (down_interruptible (&ec->poll.sem)) { + return; + } + acpi_hw_low_level_read(8, &value, &ec->common.command_addr); + up(&ec->poll.sem); + + /* TBD: Implement asynch events! + * NOTE: All we care about are EC-SCI's. Other EC events are + * handled via polling (yuck!). This is because some systems + * treat EC-SCIs as level (versus EDGE!) triggered, preventing + * a purely interrupt-driven approach (grumble, grumble). + */ if (!(value & ACPI_EC_FLAG_SCI)) goto end; if (acpi_ec_query(ec, &value)) goto end; - snprintf(object_name, 8, "_Q%2.2X", value); + object_name[2] = hex[((value >> 4) & 0x0F)]; + object_name[3] = hex[(value & 0x0F)]; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s\n", object_name)); - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name)); + acpi_evaluate_object(ec->common.handle, object_name, NULL, NULL); - acpi_evaluate_object(ec->handle, object_name, NULL, NULL); + end: + acpi_enable_gpe(NULL, ec->common.gpe_bit, ACPI_NOT_ISR); +} +static void acpi_ec_gpe_intr_query(void *ec_cxt) +{ + union acpi_ec *ec = (union acpi_ec *)ec_cxt; + u32 value; + int result = -ENODATA; + static char object_name[5] = { '_', 'Q', '0', '0', '\0' }; + const char hex[] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + }; + + if (acpi_ec_read_status(ec) & ACPI_EC_FLAG_SCI) + result = acpi_ec_query(ec, &value); + + if (result) + goto end; + + object_name[2] = hex[((value >> 4) & 0x0F)]; + object_name[3] = hex[(value & 0x0F)]; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s\n", object_name)); + + acpi_evaluate_object(ec->common.handle, object_name, NULL, NULL); end: - acpi_enable_gpe(NULL, ec->gpe_bit, ACPI_NOT_ISR); + atomic_dec(&ec->intr.pending_gpe); + return; } static u32 acpi_ec_gpe_handler(void *data) +{ + if (acpi_ec_poll_mode) + return acpi_ec_gpe_poll_handler(data); + else + return acpi_ec_gpe_intr_handler(data); +} +static u32 acpi_ec_gpe_poll_handler(void *data) { acpi_status status = AE_OK; - u8 value; - struct acpi_ec *ec = (struct acpi_ec *)data; + union acpi_ec *ec = (union acpi_ec *)data; + + if (!ec) + return ACPI_INTERRUPT_NOT_HANDLED; + + acpi_disable_gpe(NULL, ec->common.gpe_bit, ACPI_ISR); + + status = acpi_os_execute(OSL_EC_POLL_HANDLER, acpi_ec_gpe_query, ec); + + if (status == AE_OK) + return ACPI_INTERRUPT_HANDLED; + else + return ACPI_INTERRUPT_NOT_HANDLED; +} +static u32 acpi_ec_gpe_intr_handler(void *data) +{ + acpi_status status = AE_OK; + u32 value; + union acpi_ec *ec = (union acpi_ec *)data; + + if (!ec) + return ACPI_INTERRUPT_NOT_HANDLED; - acpi_clear_gpe(NULL, ec->gpe_bit, ACPI_ISR); + acpi_clear_gpe(NULL, ec->common.gpe_bit, ACPI_ISR); value = acpi_ec_read_status(ec); - if (acpi_ec_mode == EC_INTR) { - if (acpi_ec_check_status(value, ec->expect_event)) { - ec->expect_event = 0; - wake_up(&ec->wait); - } + switch (ec->intr.expect_event) { + case ACPI_EC_EVENT_OBF: + if (!(value & ACPI_EC_FLAG_OBF)) + break; + ec->intr.expect_event = 0; + wake_up(&ec->intr.wait); + break; + case ACPI_EC_EVENT_IBE: + if ((value & ACPI_EC_FLAG_IBF)) + break; + ec->intr.expect_event = 0; + wake_up(&ec->intr.wait); + break; + default: + break; } if (value & ACPI_EC_FLAG_SCI) { - status = acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, ec); + atomic_add(1, &ec->intr.pending_gpe); + status = acpi_os_execute(OSL_EC_BURST_HANDLER, + acpi_ec_gpe_query, ec); return status == AE_OK ? ACPI_INTERRUPT_HANDLED : ACPI_INTERRUPT_NOT_HANDLED; } - acpi_enable_gpe(NULL, ec->gpe_bit, ACPI_ISR); + acpi_enable_gpe(NULL, ec->common.gpe_bit, ACPI_ISR); return status == AE_OK ? ACPI_INTERRUPT_HANDLED : ACPI_INTERRUPT_NOT_HANDLED; } @@ -499,7 +833,7 @@ acpi_ec_space_handler(u32 function, void *handler_context, void *region_context) { int result = 0; - struct acpi_ec *ec = NULL; + union acpi_ec *ec = NULL; u64 temp = *value; acpi_integer f_v = 0; int i = 0; @@ -509,16 +843,18 @@ acpi_ec_space_handler(u32 function, return AE_BAD_PARAMETER; if (bit_width != 8 && acpi_strict) { + printk(KERN_WARNING PREFIX + "acpi_ec_space_handler: bit_width should be 8\n"); return AE_BAD_PARAMETER; } - ec = (struct acpi_ec *)handler_context; + ec = (union acpi_ec *)handler_context; next_byte: switch (function) { case ACPI_READ: temp = 0; - result = acpi_ec_read(ec, (u8) address, (u8 *) &temp); + result = acpi_ec_read(ec, (u8) address, (u32 *) & temp); break; case ACPI_WRITE: result = acpi_ec_write(ec, (u8) address, (u8) temp); @@ -569,20 +905,20 @@ static struct proc_dir_entry *acpi_ec_dir; static int acpi_ec_read_info(struct seq_file *seq, void *offset) { - struct acpi_ec *ec = (struct acpi_ec *)seq->private; + union acpi_ec *ec = (union acpi_ec *)seq->private; if (!ec) goto end; seq_printf(seq, "gpe bit: 0x%02x\n", - (u32) ec->gpe_bit); + (u32) ec->common.gpe_bit); seq_printf(seq, "ports: 0x%02x, 0x%02x\n", - (u32) ec->command_addr, - (u32) ec->data_addr); + (u32) ec->common.status_addr.address, + (u32) ec->common.data_addr.address); seq_printf(seq, "use global lock: %s\n", - ec->global_lock ? "yes" : "no"); - acpi_enable_gpe(NULL, ec->gpe_bit, ACPI_NOT_ISR); + ec->common.global_lock ? "yes" : "no"); + acpi_enable_gpe(NULL, ec->common.gpe_bit, ACPI_NOT_ISR); end: return 0; @@ -593,7 +929,7 @@ static int acpi_ec_info_open_fs(struct inode *inode, struct file *file) return single_open(file, acpi_ec_read_info, PDE(inode)->data); } -static struct file_operations acpi_ec_info_ops = { +static const struct file_operations acpi_ec_info_ops = { .open = acpi_ec_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -642,35 +978,101 @@ static int acpi_ec_remove_fs(struct acpi_device *device) Driver Interface -------------------------------------------------------------------------- */ -static int acpi_ec_add(struct acpi_device *device) +static int acpi_ec_poll_add(struct acpi_device *device) { int result = 0; acpi_status status = AE_OK; - struct acpi_ec *ec = NULL; + union acpi_ec *ec = NULL; if (!device) return -EINVAL; - ec = kmalloc(sizeof(struct acpi_ec), GFP_KERNEL); + ec = kmalloc(sizeof(union acpi_ec), GFP_KERNEL); if (!ec) return -ENOMEM; - memset(ec, 0, sizeof(struct acpi_ec)); - - ec->handle = device->handle; - ec->uid = -1; - init_MUTEX(&ec->sem); - if (acpi_ec_mode == EC_INTR) { - atomic_set(&ec->leaving_burst, 1); - init_waitqueue_head(&ec->wait); + memset(ec, 0, sizeof(union acpi_ec)); + + ec->common.handle = device->handle; + ec->common.uid = -1; + init_MUTEX(&ec->poll.sem); + strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_EC_CLASS); + acpi_driver_data(device) = ec; + + /* Use the global lock for all EC transactions? */ + acpi_evaluate_integer(ec->common.handle, "_GLK", NULL, + &ec->common.global_lock); + + /* XXX we don't test uids, because on some boxes ecdt uid = 0, see: + http://bugzilla.kernel.org/show_bug.cgi?id=6111 */ + if (ec_ecdt) { + acpi_remove_address_space_handler(ACPI_ROOT_OBJECT, + ACPI_ADR_SPACE_EC, + &acpi_ec_space_handler); + + acpi_remove_gpe_handler(NULL, ec_ecdt->common.gpe_bit, + &acpi_ec_gpe_handler); + + kfree(ec_ecdt); } + + /* Get GPE bit assignment (EC events). */ + /* TODO: Add support for _GPE returning a package */ + status = + acpi_evaluate_integer(ec->common.handle, "_GPE", NULL, + &ec->common.gpe_bit); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Obtaining GPE bit")); + result = -ENODEV; + goto end; + } + + result = acpi_ec_add_fs(device); + if (result) + goto end; + + printk(KERN_INFO PREFIX "%s [%s] (gpe %d) polling mode.\n", + acpi_device_name(device), acpi_device_bid(device), + (u32) ec->common.gpe_bit); + + if (!first_ec) + first_ec = device; + + end: + if (result) + kfree(ec); + + return result; +} +static int acpi_ec_intr_add(struct acpi_device *device) +{ + int result = 0; + acpi_status status = AE_OK; + union acpi_ec *ec = NULL; + + + if (!device) + return -EINVAL; + + ec = kmalloc(sizeof(union acpi_ec), GFP_KERNEL); + if (!ec) + return -ENOMEM; + memset(ec, 0, sizeof(union acpi_ec)); + + ec->common.handle = device->handle; + ec->common.uid = -1; + atomic_set(&ec->intr.pending_gpe, 0); + atomic_set(&ec->intr.leaving_burst, 1); + init_MUTEX(&ec->intr.sem); + init_waitqueue_head(&ec->intr.wait); strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_EC_CLASS); acpi_driver_data(device) = ec; /* Use the global lock for all EC transactions? */ - acpi_evaluate_integer(ec->handle, "_GLK", NULL, - &ec->global_lock); + acpi_evaluate_integer(ec->common.handle, "_GLK", NULL, + &ec->common.global_lock); /* XXX we don't test uids, because on some boxes ecdt uid = 0, see: http://bugzilla.kernel.org/show_bug.cgi?id=6111 */ @@ -679,7 +1081,7 @@ static int acpi_ec_add(struct acpi_device *device) ACPI_ADR_SPACE_EC, &acpi_ec_space_handler); - acpi_remove_gpe_handler(NULL, ec_ecdt->gpe_bit, + acpi_remove_gpe_handler(NULL, ec_ecdt->common.gpe_bit, &acpi_ec_gpe_handler); kfree(ec_ecdt); @@ -688,10 +1090,10 @@ static int acpi_ec_add(struct acpi_device *device) /* Get GPE bit assignment (EC events). */ /* TODO: Add support for _GPE returning a package */ status = - acpi_evaluate_integer(ec->handle, "_GPE", NULL, - &ec->gpe_bit); + acpi_evaluate_integer(ec->common.handle, "_GPE", NULL, + &ec->common.gpe_bit); if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Obtaining GPE bit assignment")); + printk(KERN_ERR PREFIX "Obtaining GPE bit assignment\n"); result = -ENODEV; goto end; } @@ -700,14 +1102,14 @@ static int acpi_ec_add(struct acpi_device *device) if (result) goto end; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s [%s] (gpe %d) interrupt mode.", + printk(KERN_INFO PREFIX "%s [%s] (gpe %d) interrupt mode.\n", acpi_device_name(device), acpi_device_bid(device), - (u32) ec->gpe_bit)); + (u32) ec->common.gpe_bit); if (!first_ec) first_ec = device; - end: + end: if (result) kfree(ec); @@ -716,7 +1118,7 @@ static int acpi_ec_add(struct acpi_device *device) static int acpi_ec_remove(struct acpi_device *device, int type) { - struct acpi_ec *ec = NULL; + union acpi_ec *ec = NULL; if (!device) @@ -734,7 +1136,8 @@ static int acpi_ec_remove(struct acpi_device *device, int type) static acpi_status acpi_ec_io_ports(struct acpi_resource *resource, void *context) { - struct acpi_ec *ec = (struct acpi_ec *)context; + union acpi_ec *ec = (union acpi_ec *)context; + struct acpi_generic_address *addr; if (resource->type != ACPI_RESOURCE_TYPE_IO) { return AE_OK; @@ -745,21 +1148,26 @@ acpi_ec_io_ports(struct acpi_resource *resource, void *context) * the second address region returned is the status/command * port. */ - if (ec->data_addr == 0) { - ec->data_addr = resource->data.io.minimum; - } else if (ec->command_addr == 0) { - ec->command_addr = resource->data.io.minimum; + if (ec->common.data_addr.register_bit_width == 0) { + addr = &ec->common.data_addr; + } else if (ec->common.command_addr.register_bit_width == 0) { + addr = &ec->common.command_addr; } else { return AE_CTRL_TERMINATE; } + addr->address_space_id = ACPI_ADR_SPACE_SYSTEM_IO; + addr->register_bit_width = 8; + addr->register_bit_offset = 0; + addr->address = resource->data.io.minimum; + return AE_OK; } static int acpi_ec_start(struct acpi_device *device) { acpi_status status = AE_OK; - struct acpi_ec *ec = NULL; + union acpi_ec *ec = NULL; if (!device) @@ -773,35 +1181,39 @@ static int acpi_ec_start(struct acpi_device *device) /* * Get I/O port addresses. Convert to GAS format. */ - status = acpi_walk_resources(ec->handle, METHOD_NAME__CRS, + status = acpi_walk_resources(ec->common.handle, METHOD_NAME__CRS, acpi_ec_io_ports, ec); - if (ACPI_FAILURE(status) || ec->command_addr == 0) { - ACPI_EXCEPTION((AE_INFO, status, - "Error getting I/O port addresses")); + if (ACPI_FAILURE(status) + || ec->common.command_addr.register_bit_width == 0) { + printk(KERN_ERR PREFIX "Error getting I/O port addresses\n"); return -ENODEV; } - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "gpe=0x%02lx, ports=0x%2lx,0x%2lx", - ec->gpe_bit, ec->command_addr, ec->data_addr)); + ec->common.status_addr = ec->common.command_addr; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "gpe=0x%02x, ports=0x%2x,0x%2x\n", + (u32) ec->common.gpe_bit, + (u32) ec->common.command_addr.address, + (u32) ec->common.data_addr.address)); /* * Install GPE handler */ - status = acpi_install_gpe_handler(NULL, ec->gpe_bit, + status = acpi_install_gpe_handler(NULL, ec->common.gpe_bit, ACPI_GPE_EDGE_TRIGGERED, &acpi_ec_gpe_handler, ec); if (ACPI_FAILURE(status)) { return -ENODEV; } - acpi_set_gpe_type(NULL, ec->gpe_bit, ACPI_GPE_TYPE_RUNTIME); - acpi_enable_gpe(NULL, ec->gpe_bit, ACPI_NOT_ISR); + acpi_set_gpe_type(NULL, ec->common.gpe_bit, ACPI_GPE_TYPE_RUNTIME); + acpi_enable_gpe(NULL, ec->common.gpe_bit, ACPI_NOT_ISR); - status = acpi_install_address_space_handler(ec->handle, + status = acpi_install_address_space_handler(ec->common.handle, ACPI_ADR_SPACE_EC, &acpi_ec_space_handler, &acpi_ec_space_setup, ec); if (ACPI_FAILURE(status)) { - acpi_remove_gpe_handler(NULL, ec->gpe_bit, + acpi_remove_gpe_handler(NULL, ec->common.gpe_bit, &acpi_ec_gpe_handler); return -ENODEV; } @@ -812,7 +1224,7 @@ static int acpi_ec_start(struct acpi_device *device) static int acpi_ec_stop(struct acpi_device *device, int type) { acpi_status status = AE_OK; - struct acpi_ec *ec = NULL; + union acpi_ec *ec = NULL; if (!device) @@ -820,14 +1232,14 @@ static int acpi_ec_stop(struct acpi_device *device, int type) ec = acpi_driver_data(device); - status = acpi_remove_address_space_handler(ec->handle, + status = acpi_remove_address_space_handler(ec->common.handle, ACPI_ADR_SPACE_EC, &acpi_ec_space_handler); if (ACPI_FAILURE(status)) return -ENODEV; status = - acpi_remove_gpe_handler(NULL, ec->gpe_bit, + acpi_remove_gpe_handler(NULL, ec->common.gpe_bit, &acpi_ec_gpe_handler); if (ACPI_FAILURE(status)) return -ENODEV; @@ -839,30 +1251,76 @@ static acpi_status __init acpi_fake_ecdt_callback(acpi_handle handle, u32 Level, void *context, void **retval) { + + if (acpi_ec_poll_mode) + return acpi_fake_ecdt_poll_callback(handle, + Level, context, retval); + else + return acpi_fake_ecdt_intr_callback(handle, + Level, context, retval); +} + +static acpi_status __init +acpi_fake_ecdt_poll_callback(acpi_handle handle, + u32 Level, void *context, void **retval) +{ acpi_status status; - init_MUTEX(&ec_ecdt->sem); - if (acpi_ec_mode == EC_INTR) { - init_waitqueue_head(&ec_ecdt->wait); - } status = acpi_walk_resources(handle, METHOD_NAME__CRS, acpi_ec_io_ports, ec_ecdt); if (ACPI_FAILURE(status)) return status; + ec_ecdt->common.status_addr = ec_ecdt->common.command_addr; + + ec_ecdt->common.uid = -1; + acpi_evaluate_integer(handle, "_UID", NULL, &ec_ecdt->common.uid); + + status = + acpi_evaluate_integer(handle, "_GPE", NULL, + &ec_ecdt->common.gpe_bit); + if (ACPI_FAILURE(status)) + return status; + init_MUTEX(&ec_ecdt->poll.sem); + ec_ecdt->common.global_lock = TRUE; + ec_ecdt->common.handle = handle; + + printk(KERN_INFO PREFIX "GPE=0x%02x, ports=0x%2x, 0x%2x\n", + (u32) ec_ecdt->common.gpe_bit, + (u32) ec_ecdt->common.command_addr.address, + (u32) ec_ecdt->common.data_addr.address); + + return AE_CTRL_TERMINATE; +} + +static acpi_status __init +acpi_fake_ecdt_intr_callback(acpi_handle handle, + u32 Level, void *context, void **retval) +{ + acpi_status status; + + init_MUTEX(&ec_ecdt->intr.sem); + init_waitqueue_head(&ec_ecdt->intr.wait); + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + acpi_ec_io_ports, ec_ecdt); + if (ACPI_FAILURE(status)) + return status; + ec_ecdt->common.status_addr = ec_ecdt->common.command_addr; - ec_ecdt->uid = -1; - acpi_evaluate_integer(handle, "_UID", NULL, &ec_ecdt->uid); + ec_ecdt->common.uid = -1; + acpi_evaluate_integer(handle, "_UID", NULL, &ec_ecdt->common.uid); status = acpi_evaluate_integer(handle, "_GPE", NULL, - &ec_ecdt->gpe_bit); + &ec_ecdt->common.gpe_bit); if (ACPI_FAILURE(status)) return status; - ec_ecdt->global_lock = TRUE; - ec_ecdt->handle = handle; + ec_ecdt->common.global_lock = TRUE; + ec_ecdt->common.handle = handle; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "GPE=0x%02lx, ports=0x%2lx, 0x%2lx", - ec_ecdt->gpe_bit, ec_ecdt->command_addr, ec_ecdt->data_addr)); + printk(KERN_INFO PREFIX "GPE=0x%02x, ports=0x%2x, 0x%2x\n", + (u32) ec_ecdt->common.gpe_bit, + (u32) ec_ecdt->common.command_addr.address, + (u32) ec_ecdt->common.data_addr.address); return AE_CTRL_TERMINATE; } @@ -882,14 +1340,14 @@ static int __init acpi_ec_fake_ecdt(void) acpi_status status; int ret = 0; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Try to make an fake ECDT")); + printk(KERN_INFO PREFIX "Try to make an fake ECDT\n"); - ec_ecdt = kmalloc(sizeof(struct acpi_ec), GFP_KERNEL); + ec_ecdt = kmalloc(sizeof(union acpi_ec), GFP_KERNEL); if (!ec_ecdt) { ret = -ENOMEM; goto error; } - memset(ec_ecdt, 0, sizeof(struct acpi_ec)); + memset(ec_ecdt, 0, sizeof(union acpi_ec)); status = acpi_get_devices(ACPI_EC_HID, acpi_fake_ecdt_callback, NULL, NULL); @@ -897,15 +1355,23 @@ static int __init acpi_ec_fake_ecdt(void) kfree(ec_ecdt); ec_ecdt = NULL; ret = -ENODEV; - ACPI_EXCEPTION((AE_INFO, status, "Can't make an fake ECDT")); goto error; } return 0; - error: + error: + printk(KERN_ERR PREFIX "Can't make an fake ECDT\n"); return ret; } static int __init acpi_ec_get_real_ecdt(void) +{ + if (acpi_ec_poll_mode) + return acpi_ec_poll_get_real_ecdt(); + else + return acpi_ec_intr_get_real_ecdt(); +} + +static int __init acpi_ec_poll_get_real_ecdt(void) { acpi_status status; struct acpi_table_ecdt *ecdt_ptr; @@ -916,36 +1382,80 @@ static int __init acpi_ec_get_real_ecdt(void) if (ACPI_FAILURE(status)) return -ENODEV; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found ECDT")); + printk(KERN_INFO PREFIX "Found ECDT\n"); /* * Generate a temporary ec context to use until the namespace is scanned */ - ec_ecdt = kmalloc(sizeof(struct acpi_ec), GFP_KERNEL); + ec_ecdt = kmalloc(sizeof(union acpi_ec), GFP_KERNEL); if (!ec_ecdt) return -ENOMEM; - memset(ec_ecdt, 0, sizeof(struct acpi_ec)); + memset(ec_ecdt, 0, sizeof(union acpi_ec)); + + ec_ecdt->common.command_addr = ecdt_ptr->ec_control; + ec_ecdt->common.status_addr = ecdt_ptr->ec_control; + ec_ecdt->common.data_addr = ecdt_ptr->ec_data; + ec_ecdt->common.gpe_bit = ecdt_ptr->gpe_bit; + init_MUTEX(&ec_ecdt->poll.sem); + /* use the GL just to be safe */ + ec_ecdt->common.global_lock = TRUE; + ec_ecdt->common.uid = ecdt_ptr->uid; - init_MUTEX(&ec_ecdt->sem); - if (acpi_ec_mode == EC_INTR) { - init_waitqueue_head(&ec_ecdt->wait); + status = + acpi_get_handle(NULL, ecdt_ptr->ec_id, &ec_ecdt->common.handle); + if (ACPI_FAILURE(status)) { + goto error; } - ec_ecdt->command_addr = ecdt_ptr->ec_control.address; - ec_ecdt->data_addr = ecdt_ptr->ec_data.address; - ec_ecdt->gpe_bit = ecdt_ptr->gpe_bit; + + return 0; + error: + printk(KERN_ERR PREFIX "Could not use ECDT\n"); + kfree(ec_ecdt); + ec_ecdt = NULL; + + return -ENODEV; +} + +static int __init acpi_ec_intr_get_real_ecdt(void) +{ + acpi_status status; + struct acpi_table_ecdt *ecdt_ptr; + + status = acpi_get_firmware_table("ECDT", 1, ACPI_LOGICAL_ADDRESSING, + (struct acpi_table_header **) + &ecdt_ptr); + if (ACPI_FAILURE(status)) + return -ENODEV; + + printk(KERN_INFO PREFIX "Found ECDT\n"); + + /* + * Generate a temporary ec context to use until the namespace is scanned + */ + ec_ecdt = kmalloc(sizeof(union acpi_ec), GFP_KERNEL); + if (!ec_ecdt) + return -ENOMEM; + memset(ec_ecdt, 0, sizeof(union acpi_ec)); + + init_MUTEX(&ec_ecdt->intr.sem); + init_waitqueue_head(&ec_ecdt->intr.wait); + ec_ecdt->common.command_addr = ecdt_ptr->ec_control; + ec_ecdt->common.status_addr = ecdt_ptr->ec_control; + ec_ecdt->common.data_addr = ecdt_ptr->ec_data; + ec_ecdt->common.gpe_bit = ecdt_ptr->gpe_bit; /* use the GL just to be safe */ - ec_ecdt->global_lock = TRUE; - ec_ecdt->uid = ecdt_ptr->uid; + ec_ecdt->common.global_lock = TRUE; + ec_ecdt->common.uid = ecdt_ptr->uid; status = - acpi_get_handle(NULL, ecdt_ptr->ec_id, &ec_ecdt->handle); + acpi_get_handle(NULL, ecdt_ptr->ec_id, &ec_ecdt->common.handle); if (ACPI_FAILURE(status)) { goto error; } return 0; - error: - ACPI_EXCEPTION((AE_INFO, status, "Could not use ECDT")); + error: + printk(KERN_ERR PREFIX "Could not use ECDT\n"); kfree(ec_ecdt); ec_ecdt = NULL; @@ -970,14 +1480,14 @@ int __init acpi_ec_ecdt_probe(void) /* * Install GPE handler */ - status = acpi_install_gpe_handler(NULL, ec_ecdt->gpe_bit, + status = acpi_install_gpe_handler(NULL, ec_ecdt->common.gpe_bit, ACPI_GPE_EDGE_TRIGGERED, &acpi_ec_gpe_handler, ec_ecdt); if (ACPI_FAILURE(status)) { goto error; } - acpi_set_gpe_type(NULL, ec_ecdt->gpe_bit, ACPI_GPE_TYPE_RUNTIME); - acpi_enable_gpe(NULL, ec_ecdt->gpe_bit, ACPI_NOT_ISR); + acpi_set_gpe_type(NULL, ec_ecdt->common.gpe_bit, ACPI_GPE_TYPE_RUNTIME); + acpi_enable_gpe(NULL, ec_ecdt->common.gpe_bit, ACPI_NOT_ISR); status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, ACPI_ADR_SPACE_EC, @@ -985,7 +1495,7 @@ int __init acpi_ec_ecdt_probe(void) &acpi_ec_space_setup, ec_ecdt); if (ACPI_FAILURE(status)) { - acpi_remove_gpe_handler(NULL, ec_ecdt->gpe_bit, + acpi_remove_gpe_handler(NULL, ec_ecdt->common.gpe_bit, &acpi_ec_gpe_handler); goto error; } @@ -993,7 +1503,7 @@ int __init acpi_ec_ecdt_probe(void) return 0; error: - ACPI_EXCEPTION((AE_INFO, status, "Could not use ECDT")); + printk(KERN_ERR PREFIX "Could not use ECDT\n"); kfree(ec_ecdt); ec_ecdt = NULL; @@ -1052,13 +1562,13 @@ static int __init acpi_ec_set_intr_mode(char *str) return 0; if (intr) { - acpi_ec_mode = EC_INTR; + acpi_ec_poll_mode = EC_INTR; + acpi_ec_driver.ops.add = acpi_ec_intr_add; } else { - acpi_ec_mode = EC_POLL; + acpi_ec_poll_mode = EC_POLL; + acpi_ec_driver.ops.add = acpi_ec_poll_add; } - acpi_ec_driver.ops.add = acpi_ec_add; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "EC %s mode.\n", intr ? "interrupt" : "polling")); - + printk(KERN_INFO PREFIX "EC %s mode.\n", intr ? "interrupt" : "polling"); return 1; } diff --git a/trunk/drivers/acpi/events/evmisc.c b/trunk/drivers/acpi/events/evmisc.c index ee2a10bf9077..6eef4efddcf6 100644 --- a/trunk/drivers/acpi/events/evmisc.c +++ b/trunk/drivers/acpi/events/evmisc.c @@ -342,8 +342,20 @@ static u32 acpi_ev_global_lock_handler(void *context) if (acquired) { /* Got the lock, now wake all threads waiting for it */ + acpi_gbl_global_lock_acquired = TRUE; - acpi_ev_global_lock_thread(context); + + /* Run the Global Lock thread which will signal all waiting threads */ + + status = + acpi_os_execute(OSL_GLOBAL_LOCK_HANDLER, + acpi_ev_global_lock_thread, context); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, + "Could not queue Global Lock thread")); + + return (ACPI_INTERRUPT_NOT_HANDLED); + } } return (ACPI_INTERRUPT_HANDLED); diff --git a/trunk/drivers/acpi/events/evrgnini.c b/trunk/drivers/acpi/events/evrgnini.c index 203d1359190a..5b3c7a85eb9a 100644 --- a/trunk/drivers/acpi/events/evrgnini.c +++ b/trunk/drivers/acpi/events/evrgnini.c @@ -225,12 +225,13 @@ acpi_ev_pci_config_region_setup(acpi_handle handle, if (! (ACPI_STRNCMP (object_hID.value, PCI_ROOT_HID_STRING, - sizeof(PCI_ROOT_HID_STRING))) - || - !(ACPI_STRNCMP - (object_hID.value, - PCI_EXPRESS_ROOT_HID_STRING, - sizeof(PCI_EXPRESS_ROOT_HID_STRING)))) { + sizeof(PCI_ROOT_HID_STRING)) + || + !(ACPI_STRNCMP + (object_hID.value, + PCI_EXPRESS_ROOT_HID_STRING, + sizeof(PCI_EXPRESS_ROOT_HID_STRING))))) + { /* Install a handler for this PCI root bridge */ diff --git a/trunk/drivers/acpi/ibm_acpi.c b/trunk/drivers/acpi/ibm_acpi.c index 003a9876c968..15fc12482ba0 100644 --- a/trunk/drivers/acpi/ibm_acpi.c +++ b/trunk/drivers/acpi/ibm_acpi.c @@ -1702,11 +1702,13 @@ static struct ibm_struct ibms[] = { .name = "brightness", .read = brightness_read, .write = brightness_write, + .experimental = 1, }, { .name = "volume", .read = volume_read, .write = volume_write, + .experimental = 1, }, { .name = "fan", diff --git a/trunk/drivers/acpi/motherboard.c b/trunk/drivers/acpi/motherboard.c index 2e17ec75af03..ec6b7f9ede34 100644 --- a/trunk/drivers/acpi/motherboard.c +++ b/trunk/drivers/acpi/motherboard.c @@ -48,12 +48,6 @@ ACPI_MODULE_NAME("acpi_motherboard") * the io ports if they really know they can use it, while * still preventing hotplug PCI devices from using it. */ - -/* - * When CONFIG_PNP is enabled, pnp/system.c binds to PNP0C01 - * and PNP0C02, redundant with acpi_reserve_io_ranges(). - * But acpi_reserve_io_ranges() is necessary for !CONFIG_PNP. - */ static acpi_status acpi_reserve_io_ranges(struct acpi_resource *res, void *data) { struct resource *requested_res = NULL; diff --git a/trunk/drivers/acpi/osl.c b/trunk/drivers/acpi/osl.c index c84286cbbe25..068fe4f100b0 100644 --- a/trunk/drivers/acpi/osl.c +++ b/trunk/drivers/acpi/osl.c @@ -73,7 +73,6 @@ static unsigned int acpi_irq_irq; static acpi_osd_handler acpi_irq_handler; static void *acpi_irq_context; static struct workqueue_struct *kacpid_wq; -static struct workqueue_struct *kacpi_notify_wq; acpi_status acpi_os_initialize(void) { @@ -92,9 +91,8 @@ acpi_status acpi_os_initialize1(void) return AE_NULL_ENTRY; } kacpid_wq = create_singlethread_workqueue("kacpid"); - kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); BUG_ON(!kacpid_wq); - BUG_ON(!kacpi_notify_wq); + return AE_OK; } @@ -106,7 +104,6 @@ acpi_status acpi_os_terminate(void) } destroy_workqueue(kacpid_wq); - destroy_workqueue(kacpi_notify_wq); return AE_OK; } @@ -569,7 +566,10 @@ void acpi_os_derive_pci_id(acpi_handle rhandle, /* upper bound */ static void acpi_os_execute_deferred(void *context) { - struct acpi_os_dpc *dpc = (struct acpi_os_dpc *)context; + struct acpi_os_dpc *dpc = NULL; + + + dpc = (struct acpi_os_dpc *)context; if (!dpc) { printk(KERN_ERR PREFIX "Invalid (NULL) context\n"); return; @@ -604,12 +604,14 @@ acpi_status acpi_os_execute(acpi_execute_type type, struct acpi_os_dpc *dpc; struct work_struct *task; + ACPI_FUNCTION_TRACE("os_queue_for_execution"); + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Scheduling function [%p(%p)] for deferred execution.\n", function, context)); if (!function) - return AE_BAD_PARAMETER; + return_ACPI_STATUS(AE_BAD_PARAMETER); /* * Allocate/initialize DPC structure. Note that this memory will be @@ -622,20 +624,26 @@ acpi_status acpi_os_execute(acpi_execute_type type, * from the same memory. */ - dpc = kmalloc(sizeof(struct acpi_os_dpc) + - sizeof(struct work_struct), GFP_ATOMIC); + dpc = + kmalloc(sizeof(struct acpi_os_dpc) + sizeof(struct work_struct), + GFP_ATOMIC); if (!dpc) - return AE_NO_MEMORY; + return_ACPI_STATUS(AE_NO_MEMORY); + dpc->function = function; dpc->context = context; + task = (void *)(dpc + 1); INIT_WORK(task, acpi_os_execute_deferred, (void *)dpc); - if (!queue_work((type == OSL_NOTIFY_HANDLER)? - kacpi_notify_wq : kacpid_wq, task)) { - status = AE_ERROR; + + if (!queue_work(kacpid_wq, task)) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, + "Call to queue_work() failed.\n")); kfree(dpc); + status = AE_ERROR; } - return status; + + return_ACPI_STATUS(status); } EXPORT_SYMBOL(acpi_os_execute); diff --git a/trunk/drivers/acpi/pci_link.c b/trunk/drivers/acpi/pci_link.c index d53bd9878ca2..7f3e7e77e794 100644 --- a/trunk/drivers/acpi/pci_link.c +++ b/trunk/drivers/acpi/pci_link.c @@ -307,7 +307,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq) if (!link || !irq) return -EINVAL; - resource = kmalloc(sizeof(*resource) + 1, irqs_disabled() ? GFP_ATOMIC: GFP_KERNEL); + resource = kmalloc(sizeof(*resource) + 1, GFP_ATOMIC); if (!resource) return -ENOMEM; diff --git a/trunk/drivers/acpi/power.c b/trunk/drivers/acpi/power.c index fe67a8af520e..fec225d1b6b7 100644 --- a/trunk/drivers/acpi/power.c +++ b/trunk/drivers/acpi/power.c @@ -216,8 +216,10 @@ static int acpi_power_off_device(acpi_handle handle) { int result = 0; acpi_status status = AE_OK; + struct acpi_device *device = NULL; struct acpi_power_resource *resource = NULL; + result = acpi_power_get_context(handle, &resource); if (result) return result; @@ -228,13 +230,13 @@ static int acpi_power_off_device(acpi_handle handle) if (resource->references) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] is still in use, dereferencing\n", - resource->device->pnp.bus_id)); + device->pnp.bus_id)); return 0; } if (resource->state == ACPI_POWER_RESOURCE_STATE_OFF) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] already off\n", - resource->device->pnp.bus_id)); + device->pnp.bus_id)); return 0; } @@ -249,7 +251,8 @@ static int acpi_power_off_device(acpi_handle handle) return -ENOEXEC; /* Update the power resource's _device_ power state */ - resource->device->power.state = ACPI_STATE_D3; + device = resource->device; + device->power.state = ACPI_STATE_D3; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] turned off\n", resource->name)); diff --git a/trunk/drivers/acpi/processor_core.c b/trunk/drivers/acpi/processor_core.c index 1908e0d20222..b13d64415b7a 100644 --- a/trunk/drivers/acpi/processor_core.c +++ b/trunk/drivers/acpi/processor_core.c @@ -519,7 +519,7 @@ static int acpi_processor_get_info(struct acpi_processor *pr) static void *processor_device_array[NR_CPUS]; -static int __cpuinit acpi_processor_start(struct acpi_device *device) +static int acpi_processor_start(struct acpi_device *device) { int result = 0; acpi_status status = AE_OK; diff --git a/trunk/drivers/acpi/processor_idle.c b/trunk/drivers/acpi/processor_idle.c index 526387dc3799..0a395fca843b 100644 --- a/trunk/drivers/acpi/processor_idle.c +++ b/trunk/drivers/acpi/processor_idle.c @@ -219,23 +219,6 @@ static void acpi_safe_halt(void) static atomic_t c3_cpu_count; -/* Common C-state entry for C2, C3, .. */ -static void acpi_cstate_enter(struct acpi_processor_cx *cstate) -{ - if (cstate->space_id == ACPI_CSTATE_FFH) { - /* Call into architectural FFH based C-state */ - acpi_processor_ffh_cstate_enter(cstate); - } else { - int unused; - /* IO port based C-state */ - inb(cstate->address); - /* Dummy wait op - must do something useless after P_LVL2 read - because chipsets cannot guarantee that STPCLK# signal - gets asserted in time to freeze execution properly. */ - unused = inl(acpi_fadt.xpm_tmr_blk.address); - } -} - static void acpi_processor_idle(void) { struct acpi_processor *pr = NULL; @@ -378,7 +361,11 @@ static void acpi_processor_idle(void) /* Get start time (ticks) */ t1 = inl(acpi_fadt.xpm_tmr_blk.address); /* Invoke C2 */ - acpi_cstate_enter(cx); + inb(cx->address); + /* Dummy wait op - must do something useless after P_LVL2 read + because chipsets cannot guarantee that STPCLK# signal + gets asserted in time to freeze execution properly. */ + t2 = inl(acpi_fadt.xpm_tmr_blk.address); /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); @@ -414,7 +401,9 @@ static void acpi_processor_idle(void) /* Get start time (ticks) */ t1 = inl(acpi_fadt.xpm_tmr_blk.address); /* Invoke C3 */ - acpi_cstate_enter(cx); + inb(cx->address); + /* Dummy wait op (see above) */ + t2 = inl(acpi_fadt.xpm_tmr_blk.address); /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); if (pr->flags.bm_check) { @@ -639,16 +628,20 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) return 0; } -static int acpi_processor_get_power_info_default(struct acpi_processor *pr) +static int acpi_processor_get_power_info_default_c1(struct acpi_processor *pr) { - if (!pr->power.states[ACPI_STATE_C1].valid) { - /* set the first C-State to C1 */ - /* all processors need to support C1 */ - pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; - pr->power.states[ACPI_STATE_C1].valid = 1; - } - /* the C0 state only exists as a filler in our array */ + + /* Zero initialize all the C-states info. */ + memset(pr->power.states, 0, sizeof(pr->power.states)); + + /* set the first C-State to C1 */ + pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; + + /* the C0 state only exists as a filler in our array, + * and all processors need to support C1 */ pr->power.states[ACPI_STATE_C0].valid = 1; + pr->power.states[ACPI_STATE_C1].valid = 1; + return 0; } @@ -665,7 +658,12 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) if (nocst) return -ENODEV; - current_count = 0; + current_count = 1; + + /* Zero initialize C2 onwards and prepare for fresh CST lookup */ + for (i = 2; i < ACPI_PROCESSOR_MAX_POWER; i++) + memset(&(pr->power.states[i]), 0, + sizeof(struct acpi_processor_cx)); status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); if (ACPI_FAILURE(status)) { @@ -720,39 +718,22 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) continue; + cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ? + 0 : reg->address; + /* There should be an easy way to extract an integer... */ obj = (union acpi_object *)&(element->package.elements[1]); if (obj->type != ACPI_TYPE_INTEGER) continue; cx.type = obj->integer.value; - /* - * Some buggy BIOSes won't list C1 in _CST - - * Let acpi_processor_get_power_info_default() handle them later - */ - if (i == 1 && cx.type != ACPI_STATE_C1) - current_count++; - - cx.address = reg->address; - cx.index = current_count + 1; - - cx.space_id = ACPI_CSTATE_SYSTEMIO; - if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { - if (acpi_processor_ffh_cstate_probe - (pr->id, &cx, reg) == 0) { - cx.space_id = ACPI_CSTATE_FFH; - } else if (cx.type != ACPI_STATE_C1) { - /* - * C1 is a special case where FIXED_HARDWARE - * can be handled in non-MWAIT way as well. - * In that case, save this _CST entry info. - * That is, we retain space_id of SYSTEM_IO for - * halt based C1. - * Otherwise, ignore this info and continue. - */ - continue; - } - } + + if ((cx.type != ACPI_STATE_C1) && + (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) + continue; + + if ((cx.type < ACPI_STATE_C2) || (cx.type > ACPI_STATE_C3)) + continue; obj = (union acpi_object *)&(element->package.elements[2]); if (obj->type != ACPI_TYPE_INTEGER) @@ -957,18 +938,12 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr) /* NOTE: the idle thread may not be running while calling * this function */ - /* Zero initialize all the C-states info. */ - memset(pr->power.states, 0, sizeof(pr->power.states)); - + /* Adding C1 state */ + acpi_processor_get_power_info_default_c1(pr); result = acpi_processor_get_power_info_cst(pr); if (result == -ENODEV) acpi_processor_get_power_info_fadt(pr); - if (result) - return result; - - acpi_processor_get_power_info_default(pr); - pr->power.count = acpi_processor_power_verify(pr); /* @@ -1130,7 +1105,7 @@ static struct notifier_block acpi_processor_latency_notifier = { .notifier_call = acpi_processor_latency_notify, }; -int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, +int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { acpi_status status = 0; diff --git a/trunk/drivers/acpi/sbs.c b/trunk/drivers/acpi/sbs.c index 8908a975e575..62bef0b3b614 100644 --- a/trunk/drivers/acpi/sbs.c +++ b/trunk/drivers/acpi/sbs.c @@ -98,11 +98,11 @@ static int update_info_mode = UPDATE_INFO_MODE; static int update_time = UPDATE_TIME; static int update_time2 = UPDATE_TIME2; -module_param(capacity_mode, int, 0); -module_param(update_mode, int, 0); -module_param(update_info_mode, int, 0); -module_param(update_time, int, 0); -module_param(update_time2, int, 0); +module_param(capacity_mode, int, CAPACITY_UNIT); +module_param(update_mode, int, UPDATE_MODE); +module_param(update_info_mode, int, UPDATE_INFO_MODE); +module_param(update_time, int, UPDATE_TIME); +module_param(update_time2, int, UPDATE_TIME2); static int acpi_sbs_add(struct acpi_device *device); static int acpi_sbs_remove(struct acpi_device *device, int type); @@ -1685,16 +1685,10 @@ static int acpi_sbs_add(struct acpi_device *device) int acpi_sbs_remove(struct acpi_device *device, int type) { - struct acpi_sbs *sbs = NULL; + struct acpi_sbs *sbs = (struct acpi_sbs *)acpi_driver_data(device); int id; - if (!device) { - return -EINVAL; - } - - sbs = (struct acpi_sbs *)acpi_driver_data(device); - - if (!sbs) { + if (!device || !sbs) { return -EINVAL; } diff --git a/trunk/drivers/acpi/tables/tbget.c b/trunk/drivers/acpi/tables/tbget.c index 11e2d4454e05..7856db759af0 100644 --- a/trunk/drivers/acpi/tables/tbget.c +++ b/trunk/drivers/acpi/tables/tbget.c @@ -324,7 +324,7 @@ acpi_tb_get_this_table(struct acpi_pointer *address, if (header->length < sizeof(struct acpi_table_header)) { ACPI_ERROR((AE_INFO, - "Table length (%X) is smaller than minimum (%zX)", + "Table length (%X) is smaller than minimum (%X)", header->length, sizeof(struct acpi_table_header))); return_ACPI_STATUS(AE_INVALID_TABLE_LENGTH); diff --git a/trunk/drivers/acpi/tables/tbrsdt.c b/trunk/drivers/acpi/tables/tbrsdt.c index 86a5fca9b739..0ad3dbb9ebca 100644 --- a/trunk/drivers/acpi/tables/tbrsdt.c +++ b/trunk/drivers/acpi/tables/tbrsdt.c @@ -187,7 +187,7 @@ acpi_status acpi_tb_validate_rsdt(struct acpi_table_header *table_ptr) if (table_ptr->length < sizeof(struct acpi_table_header)) { ACPI_ERROR((AE_INFO, - "RSDT/XSDT length (%X) is smaller than minimum (%zX)", + "RSDT/XSDT length (%X) is smaller than minimum (%X)", table_ptr->length, sizeof(struct acpi_table_header))); diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c index 83728a9457ad..77138a39eb04 100644 --- a/trunk/drivers/ata/libata-core.c +++ b/trunk/drivers/ata/libata-core.c @@ -870,11 +870,7 @@ static unsigned int ata_id_xfermask(const u16 *id) * the PIO timing number for the maximum. Turn it into * a mask. */ - u8 mode = id[ATA_ID_OLD_PIO_MODES] & 0xFF; - if (mode < 5) /* Valid PIO range */ - pio_mask = (2 << mode) - 1; - else - pio_mask = 1; + pio_mask = (2 << (id[ATA_ID_OLD_PIO_MODES] & 0xFF)) - 1 ; /* But wait.. there's more. Design your standards by * committee and you too can get a free iordy field to diff --git a/trunk/drivers/ata/libata-scsi.c b/trunk/drivers/ata/libata-scsi.c index 7af2a4ba4990..b0d0cc41f3e8 100644 --- a/trunk/drivers/ata/libata-scsi.c +++ b/trunk/drivers/ata/libata-scsi.c @@ -164,10 +164,10 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) { int rc = 0; u8 scsi_cmd[MAX_COMMAND_SIZE]; - u8 args[4], *argbuf = NULL, *sensebuf = NULL; + u8 args[4], *argbuf = NULL; int argsize = 0; + struct scsi_sense_hdr sshdr; enum dma_data_direction data_dir; - int cmd_result; if (arg == NULL) return -EINVAL; @@ -175,10 +175,6 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) if (copy_from_user(args, arg, sizeof(args))) return -EFAULT; - sensebuf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO); - if (!sensebuf) - return -ENOMEM; - memset(scsi_cmd, 0, sizeof(scsi_cmd)); if (args[3]) { @@ -195,7 +191,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) data_dir = DMA_FROM_DEVICE; } else { scsi_cmd[1] = (3 << 1); /* Non-data */ - scsi_cmd[2] = 0x20; /* cc but no off.line or data xfer */ + /* scsi_cmd[2] is already 0 -- no off.line, cc, or data xfer */ data_dir = DMA_NONE; } @@ -214,46 +210,18 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) /* Good values for timeout and retries? Values below from scsi_ioctl_send_command() for default case... */ - cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize, - sensebuf, (10*HZ), 5, 0); - - if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */ - u8 *desc = sensebuf + 8; - cmd_result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */ - - /* If we set cc then ATA pass-through will cause a - * check condition even if no error. Filter that. */ - if (cmd_result & SAM_STAT_CHECK_CONDITION) { - struct scsi_sense_hdr sshdr; - scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, - &sshdr); - if (sshdr.sense_key==0 && - sshdr.asc==0 && sshdr.ascq==0) - cmd_result &= ~SAM_STAT_CHECK_CONDITION; - } - - /* Send userspace a few ATA registers (same as drivers/ide) */ - if (sensebuf[0] == 0x72 && /* format is "descriptor" */ - desc[0] == 0x09 ) { /* code is "ATA Descriptor" */ - args[0] = desc[13]; /* status */ - args[1] = desc[3]; /* error */ - args[2] = desc[5]; /* sector count (0:7) */ - if (copy_to_user(arg, args, sizeof(args))) - rc = -EFAULT; - } - } - - - if (cmd_result) { + if (scsi_execute_req(scsidev, scsi_cmd, data_dir, argbuf, argsize, + &sshdr, (10*HZ), 5)) { rc = -EIO; goto error; } + /* Need code to retrieve data from check condition? */ + if ((argbuf) && copy_to_user(arg + sizeof(args), argbuf, argsize)) rc = -EFAULT; error: - kfree(sensebuf); kfree(argbuf); return rc; } diff --git a/trunk/drivers/ata/pata_qdi.c b/trunk/drivers/ata/pata_qdi.c index 2c3cc0ccc606..7977f471d5e9 100644 --- a/trunk/drivers/ata/pata_qdi.c +++ b/trunk/drivers/ata/pata_qdi.c @@ -141,7 +141,7 @@ static void qdi_data_xfer(struct ata_device *adev, unsigned char *buf, unsigned memcpy(&pad, buf + buflen - slop, slop); outl(le32_to_cpu(pad), ap->ioaddr.data_addr); } else { - pad = cpu_to_le32(inl(ap->ioaddr.data_addr)); + pad = cpu_to_le16(inl(ap->ioaddr.data_addr)); memcpy(buf + buflen - slop, &pad, slop); } } diff --git a/trunk/drivers/ata/sata_promise.c b/trunk/drivers/ata/sata_promise.c index 72eda5160fad..8bcdfa64667c 100644 --- a/trunk/drivers/ata/sata_promise.c +++ b/trunk/drivers/ata/sata_promise.c @@ -260,7 +260,6 @@ static const struct pci_device_id pdc_ata_pci_tbl[] = { #if 0 { PCI_VDEVICE(PROMISE, 0x3570), board_20771 }, #endif - { PCI_VDEVICE(PROMISE, 0x3577), board_20771 }, { } /* terminate list */ }; diff --git a/trunk/drivers/block/DAC960.h b/trunk/drivers/block/DAC960.h index 6148073532b2..cec539e601fe 100644 --- a/trunk/drivers/block/DAC960.h +++ b/trunk/drivers/block/DAC960.h @@ -4379,8 +4379,8 @@ static inline void DAC960_P_To_PD_TranslateEnquiry(void *Enquiry) static inline void DAC960_P_To_PD_TranslateDeviceState(void *DeviceState) { memcpy(DeviceState + 2, DeviceState + 3, 1); - memmove(DeviceState + 4, DeviceState + 5, 2); - memmove(DeviceState + 6, DeviceState + 8, 4); + memcpy(DeviceState + 4, DeviceState + 5, 2); + memcpy(DeviceState + 6, DeviceState + 8, 4); } static inline diff --git a/trunk/drivers/block/amiflop.c b/trunk/drivers/block/amiflop.c index 5d6562171533..5d254b714509 100644 --- a/trunk/drivers/block/amiflop.c +++ b/trunk/drivers/block/amiflop.c @@ -1709,13 +1709,10 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) return get_disk(unit[drive].gendisk); } -static int __init amiga_floppy_init(void) +int __init amiga_floppy_init(void) { int i, ret; - if (!MACH_IS_AMIGA) - return -ENXIO; - if (!AMIGAHW_PRESENT(AMI_FLOPPY)) return -ENXIO; @@ -1812,9 +1809,15 @@ static int __init amiga_floppy_init(void) return ret; } -module_init(amiga_floppy_init); #ifdef MODULE +int init_module(void) +{ + if (!MACH_IS_AMIGA) + return -ENXIO; + return amiga_floppy_init(); +} + #if 0 /* not safe to unload */ void cleanup_module(void) { diff --git a/trunk/drivers/block/xd.c b/trunk/drivers/block/xd.c index 0d97b7eb818a..10cc38783bdf 100644 --- a/trunk/drivers/block/xd.c +++ b/trunk/drivers/block/xd.c @@ -48,9 +48,9 @@ #include #include #include -#include #include +#include #include #include diff --git a/trunk/drivers/block/z2ram.c b/trunk/drivers/block/z2ram.c index 7cc2685ca84a..82ddbdd7bd4b 100644 --- a/trunk/drivers/block/z2ram.c +++ b/trunk/drivers/block/z2ram.c @@ -329,7 +329,7 @@ static struct kobject *z2_find(dev_t dev, int *part, void *data) static struct request_queue *z2_queue; -static int __init +int __init z2_init(void) { int ret; @@ -370,7 +370,26 @@ z2_init(void) return ret; } -static void __exit z2_exit(void) +#if defined(MODULE) + +MODULE_LICENSE("GPL"); + +int +init_module( void ) +{ + int error; + + error = z2_init(); + if ( error == 0 ) + { + printk( KERN_INFO DEVICE_NAME ": loaded as module\n" ); + } + + return error; +} + +void +cleanup_module( void ) { int i, j; blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), 256); @@ -406,7 +425,4 @@ static void __exit z2_exit(void) return; } - -module_init(z2_init); -module_exit(z2_exit); -MODULE_LICENSE("GPL"); +#endif diff --git a/trunk/drivers/char/ip2/i2lib.c b/trunk/drivers/char/ip2/i2lib.c index 54d93f0345e8..fc944d375be7 100644 --- a/trunk/drivers/char/ip2/i2lib.c +++ b/trunk/drivers/char/ip2/i2lib.c @@ -1007,7 +1007,7 @@ i2InputAvailable(i2ChanStrPtr pCh) // applications that one cannot break out of. //****************************************************************************** static int -i2Output(i2ChanStrPtr pCh, const char *pSource, int count) +i2Output(i2ChanStrPtr pCh, const char *pSource, int count, int user ) { i2eBordStrPtr pB; unsigned char *pInsert; @@ -1020,7 +1020,7 @@ i2Output(i2ChanStrPtr pCh, const char *pSource, int count) int bailout = 10; - ip2trace (CHANN, ITRC_OUTPUT, ITRC_ENTER, 2, count, 0 ); + ip2trace (CHANN, ITRC_OUTPUT, ITRC_ENTER, 2, count, user ); // Ensure channel structure seems real if ( !i2Validate ( pCh ) ) @@ -1087,7 +1087,12 @@ i2Output(i2ChanStrPtr pCh, const char *pSource, int count) DATA_COUNT_OF(pInsert) = amountToMove; // Move the data - memcpy( (char*)(DATA_OF(pInsert)), pSource, amountToMove ); + if ( user ) { + rc = copy_from_user((char*)(DATA_OF(pInsert)), pSource, + amountToMove ); + } else { + memcpy( (char*)(DATA_OF(pInsert)), pSource, amountToMove ); + } // Adjust pointers and indices pSource += amountToMove; pCh->Obuf_char_count += amountToMove; diff --git a/trunk/drivers/char/ip2/i2lib.h b/trunk/drivers/char/ip2/i2lib.h index e559e9bac06d..952e113ccd8a 100644 --- a/trunk/drivers/char/ip2/i2lib.h +++ b/trunk/drivers/char/ip2/i2lib.h @@ -332,7 +332,7 @@ static int i2QueueCommands(int, i2ChanStrPtr, int, int, cmdSyntaxPtr,...); static int i2GetStatus(i2ChanStrPtr, int); static int i2Input(i2ChanStrPtr); static int i2InputFlush(i2ChanStrPtr); -static int i2Output(i2ChanStrPtr, const char *, int); +static int i2Output(i2ChanStrPtr, const char *, int, int); static int i2OutputFree(i2ChanStrPtr); static int i2ServiceBoard(i2eBordStrPtr); static void i2DrainOutput(i2ChanStrPtr, int); diff --git a/trunk/drivers/char/ip2/ip2main.c b/trunk/drivers/char/ip2/ip2main.c index a3f32d46d2f8..858ba5432c99 100644 --- a/trunk/drivers/char/ip2/ip2main.c +++ b/trunk/drivers/char/ip2/ip2main.c @@ -1704,7 +1704,7 @@ ip2_write( PTTY tty, const unsigned char *pData, int count) /* This is the actual move bit. Make sure it does what we need!!!!! */ WRITE_LOCK_IRQSAVE(&pCh->Pbuf_spinlock,flags); - bytesSent = i2Output( pCh, pData, count); + bytesSent = i2Output( pCh, pData, count, 0 ); WRITE_UNLOCK_IRQRESTORE(&pCh->Pbuf_spinlock,flags); ip2trace (CHANN, ITRC_WRITE, ITRC_RETURN, 1, bytesSent ); @@ -1764,7 +1764,7 @@ ip2_flush_chars( PTTY tty ) // // We may need to restart i2Output if it does not fullfill this request // - strip = i2Output( pCh, pCh->Pbuf, pCh->Pbuf_stuff); + strip = i2Output( pCh, pCh->Pbuf, pCh->Pbuf_stuff, 0 ); if ( strip != pCh->Pbuf_stuff ) { memmove( pCh->Pbuf, &pCh->Pbuf[strip], pCh->Pbuf_stuff - strip ); } diff --git a/trunk/drivers/char/ipmi/ipmi_msghandler.c b/trunk/drivers/char/ipmi/ipmi_msghandler.c index 34a4fd13fa81..2455e8d478ac 100644 --- a/trunk/drivers/char/ipmi/ipmi_msghandler.c +++ b/trunk/drivers/char/ipmi/ipmi_msghandler.c @@ -1928,8 +1928,13 @@ static ssize_t guid_show(struct device *dev, struct device_attribute *attr, (long long) bmc->guid[8]); } -static void remove_files(struct bmc_device *bmc) +static void +cleanup_bmc_device(struct kref *ref) { + struct bmc_device *bmc; + + bmc = container_of(ref, struct bmc_device, refcount); + device_remove_file(&bmc->dev->dev, &bmc->device_id_attr); device_remove_file(&bmc->dev->dev, @@ -1946,23 +1951,12 @@ static void remove_files(struct bmc_device *bmc) &bmc->manufacturer_id_attr); device_remove_file(&bmc->dev->dev, &bmc->product_id_attr); - if (bmc->id.aux_firmware_revision_set) device_remove_file(&bmc->dev->dev, &bmc->aux_firmware_rev_attr); if (bmc->guid_set) device_remove_file(&bmc->dev->dev, &bmc->guid_attr); -} - -static void -cleanup_bmc_device(struct kref *ref) -{ - struct bmc_device *bmc; - - bmc = container_of(ref, struct bmc_device, refcount); - - remove_files(bmc); platform_device_unregister(bmc->dev); kfree(bmc); } @@ -1983,79 +1977,6 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf) mutex_unlock(&ipmidriver_mutex); } -static int create_files(struct bmc_device *bmc) -{ - int err; - - err = device_create_file(&bmc->dev->dev, - &bmc->device_id_attr); - if (err) goto out; - err = device_create_file(&bmc->dev->dev, - &bmc->provides_dev_sdrs_attr); - if (err) goto out_devid; - err = device_create_file(&bmc->dev->dev, - &bmc->revision_attr); - if (err) goto out_sdrs; - err = device_create_file(&bmc->dev->dev, - &bmc->firmware_rev_attr); - if (err) goto out_rev; - err = device_create_file(&bmc->dev->dev, - &bmc->version_attr); - if (err) goto out_firm; - err = device_create_file(&bmc->dev->dev, - &bmc->add_dev_support_attr); - if (err) goto out_version; - err = device_create_file(&bmc->dev->dev, - &bmc->manufacturer_id_attr); - if (err) goto out_add_dev; - err = device_create_file(&bmc->dev->dev, - &bmc->product_id_attr); - if (err) goto out_manu; - if (bmc->id.aux_firmware_revision_set) { - err = device_create_file(&bmc->dev->dev, - &bmc->aux_firmware_rev_attr); - if (err) goto out_prod_id; - } - if (bmc->guid_set) { - err = device_create_file(&bmc->dev->dev, - &bmc->guid_attr); - if (err) goto out_aux_firm; - } - - return 0; - -out_aux_firm: - if (bmc->id.aux_firmware_revision_set) - device_remove_file(&bmc->dev->dev, - &bmc->aux_firmware_rev_attr); -out_prod_id: - device_remove_file(&bmc->dev->dev, - &bmc->product_id_attr); -out_manu: - device_remove_file(&bmc->dev->dev, - &bmc->manufacturer_id_attr); -out_add_dev: - device_remove_file(&bmc->dev->dev, - &bmc->add_dev_support_attr); -out_version: - device_remove_file(&bmc->dev->dev, - &bmc->version_attr); -out_firm: - device_remove_file(&bmc->dev->dev, - &bmc->firmware_rev_attr); -out_rev: - device_remove_file(&bmc->dev->dev, - &bmc->revision_attr); -out_sdrs: - device_remove_file(&bmc->dev->dev, - &bmc->provides_dev_sdrs_attr); -out_devid: - device_remove_file(&bmc->dev->dev, - &bmc->device_id_attr); -out: - return err; -} - static int ipmi_bmc_register(ipmi_smi_t intf) { int rv; @@ -2130,6 +2051,7 @@ static int ipmi_bmc_register(ipmi_smi_t intf) bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; + bmc->revision_attr.attr.name = "revision"; bmc->revision_attr.attr.owner = THIS_MODULE; bmc->revision_attr.attr.mode = S_IRUGO; @@ -2171,14 +2093,28 @@ static int ipmi_bmc_register(ipmi_smi_t intf) bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; - rv = create_files(bmc); - if (rv) { - mutex_lock(&ipmidriver_mutex); - platform_device_unregister(bmc->dev); - mutex_unlock(&ipmidriver_mutex); - - return rv; - } + device_create_file(&bmc->dev->dev, + &bmc->device_id_attr); + device_create_file(&bmc->dev->dev, + &bmc->provides_dev_sdrs_attr); + device_create_file(&bmc->dev->dev, + &bmc->revision_attr); + device_create_file(&bmc->dev->dev, + &bmc->firmware_rev_attr); + device_create_file(&bmc->dev->dev, + &bmc->version_attr); + device_create_file(&bmc->dev->dev, + &bmc->add_dev_support_attr); + device_create_file(&bmc->dev->dev, + &bmc->manufacturer_id_attr); + device_create_file(&bmc->dev->dev, + &bmc->product_id_attr); + if (bmc->id.aux_firmware_revision_set) + device_create_file(&bmc->dev->dev, + &bmc->aux_firmware_rev_attr); + if (bmc->guid_set) + device_create_file(&bmc->dev->dev, + &bmc->guid_attr); printk(KERN_INFO "ipmi: Found new BMC (man_id: 0x%6.6x, " diff --git a/trunk/drivers/char/mem.c b/trunk/drivers/char/mem.c index 55473371b7c6..6511012cbdcd 100644 --- a/trunk/drivers/char/mem.c +++ b/trunk/drivers/char/mem.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -293,8 +292,8 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma) { unsigned long pfn; - /* Turn a pfn offset into an absolute pfn */ - pfn = PFN_DOWN(virt_to_phys((void *)PAGE_OFFSET)) + vma->vm_pgoff; + /* Turn a kernel-virtual address into a physical page frame */ + pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; /* * RED-PEN: on some architectures there is more mapped memory diff --git a/trunk/drivers/char/random.c b/trunk/drivers/char/random.c index 07f47a0208a7..eb6b13f4211a 100644 --- a/trunk/drivers/char/random.c +++ b/trunk/drivers/char/random.c @@ -645,6 +645,7 @@ void add_input_randomness(unsigned int type, unsigned int code, add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); } +EXPORT_SYMBOL_GPL(add_input_randomness); void add_interrupt_randomness(int irq) { diff --git a/trunk/drivers/char/serial167.c b/trunk/drivers/char/serial167.c index 3af7f0958c5d..461bfe0234c9 100644 --- a/trunk/drivers/char/serial167.c +++ b/trunk/drivers/char/serial167.c @@ -839,7 +839,7 @@ shutdown(struct cyclades_port * info) local_irq_save(flags); if (info->xmit_buf){ free_page((unsigned long) info->xmit_buf); - info->xmit_buf = NULL; + info->xmit_buf = 0; } base_addr[CyCAR] = (u_char)channel; @@ -1354,7 +1354,7 @@ cy_unthrottle(struct tty_struct * tty) static int get_serial_info(struct cyclades_port * info, - struct serial_struct __user * retinfo) + struct serial_struct * retinfo) { struct serial_struct tmp; @@ -1376,7 +1376,7 @@ get_serial_info(struct cyclades_port * info, static int set_serial_info(struct cyclades_port * info, - struct serial_struct __user * new_info) + struct serial_struct * new_info) { struct serial_struct new_serial; struct cyclades_port old_info; @@ -1503,7 +1503,7 @@ send_break( struct cyclades_port * info, int duration) } /* send_break */ static int -get_mon_info(struct cyclades_port * info, struct cyclades_monitor __user * mon) +get_mon_info(struct cyclades_port * info, struct cyclades_monitor * mon) { if (copy_to_user(mon, &info->mon, sizeof(struct cyclades_monitor))) @@ -1516,7 +1516,7 @@ get_mon_info(struct cyclades_port * info, struct cyclades_monitor __user * mon) } static int -set_threshold(struct cyclades_port * info, unsigned long __user *arg) +set_threshold(struct cyclades_port * info, unsigned long *arg) { volatile unsigned char *base_addr = (u_char *)BASE_ADDR; unsigned long value; @@ -1533,7 +1533,7 @@ set_threshold(struct cyclades_port * info, unsigned long __user *arg) } static int -get_threshold(struct cyclades_port * info, unsigned long __user *value) +get_threshold(struct cyclades_port * info, unsigned long *value) { volatile unsigned char *base_addr = (u_char *)BASE_ADDR; int channel; @@ -1546,7 +1546,7 @@ get_threshold(struct cyclades_port * info, unsigned long __user *value) } static int -set_default_threshold(struct cyclades_port * info, unsigned long __user *arg) +set_default_threshold(struct cyclades_port * info, unsigned long *arg) { unsigned long value; @@ -1558,13 +1558,13 @@ set_default_threshold(struct cyclades_port * info, unsigned long __user *arg) } static int -get_default_threshold(struct cyclades_port * info, unsigned long __user *value) +get_default_threshold(struct cyclades_port * info, unsigned long *value) { return put_user(info->default_threshold,value); } static int -set_timeout(struct cyclades_port * info, unsigned long __user *arg) +set_timeout(struct cyclades_port * info, unsigned long *arg) { volatile unsigned char *base_addr = (u_char *)BASE_ADDR; int channel; @@ -1581,7 +1581,7 @@ set_timeout(struct cyclades_port * info, unsigned long __user *arg) } static int -get_timeout(struct cyclades_port * info, unsigned long __user *value) +get_timeout(struct cyclades_port * info, unsigned long *value) { volatile unsigned char *base_addr = (u_char *)BASE_ADDR; int channel; @@ -1601,7 +1601,7 @@ set_default_timeout(struct cyclades_port * info, unsigned long value) } static int -get_default_timeout(struct cyclades_port * info, unsigned long __user *value) +get_default_timeout(struct cyclades_port * info, unsigned long *value) { return put_user(info->default_timeout,value); } @@ -1613,7 +1613,6 @@ cy_ioctl(struct tty_struct *tty, struct file * file, unsigned long val; struct cyclades_port * info = (struct cyclades_port *)tty->driver_data; int ret_val = 0; - void __user *argp = (void __user *)arg; #ifdef SERIAL_DEBUG_OTHER printk("cy_ioctl %s, cmd = %x arg = %lx\n", tty->name, cmd, arg); /* */ @@ -1621,28 +1620,28 @@ cy_ioctl(struct tty_struct *tty, struct file * file, switch (cmd) { case CYGETMON: - ret_val = get_mon_info(info, argp); + ret_val = get_mon_info(info, (struct cyclades_monitor *)arg); break; case CYGETTHRESH: - ret_val = get_threshold(info, argp); + ret_val = get_threshold(info, (unsigned long *)arg); break; case CYSETTHRESH: - ret_val = set_threshold(info, argp); + ret_val = set_threshold(info, (unsigned long *)arg); break; case CYGETDEFTHRESH: - ret_val = get_default_threshold(info, argp); + ret_val = get_default_threshold(info, (unsigned long *)arg); break; case CYSETDEFTHRESH: - ret_val = set_default_threshold(info, argp); + ret_val = set_default_threshold(info, (unsigned long *)arg); break; case CYGETTIMEOUT: - ret_val = get_timeout(info, argp); + ret_val = get_timeout(info, (unsigned long *)arg); break; case CYSETTIMEOUT: - ret_val = set_timeout(info, argp); + ret_val = set_timeout(info, (unsigned long *)arg); break; case CYGETDEFTIMEOUT: - ret_val = get_default_timeout(info, argp); + ret_val = get_default_timeout(info, (unsigned long *)arg); break; case CYSETDEFTIMEOUT: ret_val = set_default_timeout(info, (unsigned long)arg); @@ -1665,20 +1664,21 @@ cy_ioctl(struct tty_struct *tty, struct file * file, /* The following commands are incompletely implemented!!! */ case TIOCGSOFTCAR: - ret_val = put_user(C_CLOCAL(tty) ? 1 : 0, (unsigned long __user *) argp); + ret_val = put_user(C_CLOCAL(tty) ? 1 : 0, (unsigned long *) arg); break; case TIOCSSOFTCAR: - ret_val = get_user(val, (unsigned long __user *) argp); + ret_val = get_user(val, (unsigned long *) arg); if (ret_val) break; tty->termios->c_cflag = ((tty->termios->c_cflag & ~CLOCAL) | (val ? CLOCAL : 0)); break; case TIOCGSERIAL: - ret_val = get_serial_info(info, argp); + ret_val = get_serial_info(info, (struct serial_struct *) arg); break; case TIOCSSERIAL: - ret_val = set_serial_info(info, argp); + ret_val = set_serial_info(info, + (struct serial_struct *) arg); break; default: ret_val = -ENOIOCTLCMD; @@ -1773,7 +1773,7 @@ cy_close(struct tty_struct * tty, struct file * filp) tty->driver->flush_buffer(tty); tty_ldisc_flush(tty); info->event = 0; - info->tty = NULL; + info->tty = 0; if (info->blocked_open) { if (info->close_delay) { msleep_interruptible(jiffies_to_msecs(info->close_delay)); @@ -2250,7 +2250,7 @@ scrn[1] = '\0'; info->card = index; info->line = port_num; info->flags = STD_COM_FLAGS; - info->tty = NULL; + info->tty = 0; info->xmit_fifo_size = 12; info->cor1 = CyPARITY_NONE|Cy_8_BITS; info->cor2 = CyETC; diff --git a/trunk/drivers/char/tpm/tpm.c b/trunk/drivers/char/tpm/tpm.c index 6ad2d3bb945c..a082a2e34252 100644 --- a/trunk/drivers/char/tpm/tpm.c +++ b/trunk/drivers/char/tpm/tpm.c @@ -1153,14 +1153,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, const struct tpm_vend spin_unlock(&driver_lock); - if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) { - list_del(&chip->list); - put_device(dev); - clear_bit(chip->dev_num, dev_mask); - kfree(chip); - kfree(devname); - return NULL; - } + sysfs_create_group(&dev->kobj, chip->vendor.attr_group); chip->bios_dir = tpm_bios_log_setup(devname); diff --git a/trunk/drivers/char/tpm/tpm_atmel.c b/trunk/drivers/char/tpm/tpm_atmel.c index 1ab0896070be..ad8ffe49256f 100644 --- a/trunk/drivers/char/tpm/tpm_atmel.c +++ b/trunk/drivers/char/tpm/tpm_atmel.c @@ -184,9 +184,7 @@ static int __init init_atmel(void) unsigned long base; struct tpm_chip *chip; - rc = driver_register(&atml_drv); - if (rc) - return rc; + driver_register(&atml_drv); if ((iobase = atmel_get_base_addr(&base, ®ion_size)) == NULL) { rc = -ENODEV; @@ -197,8 +195,10 @@ static int __init init_atmel(void) (atmel_request_region (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; - pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0); - if (IS_ERR(pdev)) { + + if (IS_ERR + (pdev = + platform_device_register_simple("tpm_atmel", -1, NULL, 0))) { rc = PTR_ERR(pdev); goto err_rel_reg; } diff --git a/trunk/drivers/char/tpm/tpm_nsc.c b/trunk/drivers/char/tpm/tpm_nsc.c index 608f73071bef..26287aace87d 100644 --- a/trunk/drivers/char/tpm/tpm_nsc.c +++ b/trunk/drivers/char/tpm/tpm_nsc.c @@ -284,7 +284,7 @@ static struct device_driver nsc_drv = { static int __init init_nsc(void) { int rc = 0; - int lo, hi, err; + int lo, hi; int nscAddrBase = TPM_ADDR; struct tpm_chip *chip; unsigned long base; @@ -297,9 +297,7 @@ static int __init init_nsc(void) return -ENODEV; } - err = driver_register(&nsc_drv); - if (err) - return err; + driver_register(&nsc_drv); hi = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_HI); lo = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_LO); diff --git a/trunk/drivers/cpufreq/cpufreq.c b/trunk/drivers/cpufreq/cpufreq.c index 0c18ac2fe7c2..86e69b7f9122 100644 --- a/trunk/drivers/cpufreq/cpufreq.c +++ b/trunk/drivers/cpufreq/cpufreq.c @@ -29,8 +29,7 @@ #include #include -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \ - "cpufreq-core", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg) /** * The "cpufreq driver" - the arch- or hardware-dependent low @@ -42,8 +41,7 @@ static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS]; static DEFINE_SPINLOCK(cpufreq_driver_lock); /* internal prototypes */ -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event); +static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static void handle_update(void *data); /** @@ -153,8 +151,7 @@ static void cpufreq_debug_disable_ratelimit(void) spin_unlock_irqrestore(&disable_ratelimit_lock, flags); } -void cpufreq_debug_printk(unsigned int type, const char *prefix, - const char *fmt, ...) +void cpufreq_debug_printk(unsigned int type, const char *prefix, const char *fmt, ...) { char s[256]; va_list args; @@ -164,8 +161,7 @@ void cpufreq_debug_printk(unsigned int type, const char *prefix, WARN_ON(!prefix); if (type & debug) { spin_lock_irqsave(&disable_ratelimit_lock, flags); - if (!disable_ratelimit && debug_ratelimit - && !printk_ratelimit()) { + if (!disable_ratelimit && debug_ratelimit && !printk_ratelimit()) { spin_unlock_irqrestore(&disable_ratelimit_lock, flags); return; } @@ -186,12 +182,10 @@ EXPORT_SYMBOL(cpufreq_debug_printk); module_param(debug, uint, 0644); -MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core," - " 2 to debug drivers, and 4 to debug governors."); +MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core, 2 to debug drivers, and 4 to debug governors."); module_param(debug_ratelimit, uint, 0644); -MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:" - " set to 0 to disable ratelimiting."); +MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging: set to 0 to disable ratelimiting."); #else /* !CONFIG_CPU_FREQ_DEBUG */ @@ -225,23 +219,17 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) if (!l_p_j_ref_freq) { l_p_j_ref = loops_per_jiffy; l_p_j_ref_freq = ci->old; - dprintk("saving %lu as reference value for loops_per_jiffy;" - "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); + dprintk("saving %lu as reference value for loops_per_jiffy; freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); } if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { - loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, - ci->new); - dprintk("scaling loops_per_jiffy to %lu" - "for frequency %u kHz\n", loops_per_jiffy, ci->new); + loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, ci->new); + dprintk("scaling loops_per_jiffy to %lu for frequency %u kHz\n", loops_per_jiffy, ci->new); } } #else -static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) -{ - return; -} +static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) { return; } #endif @@ -328,8 +316,7 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy, if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_PERFORMANCE; err = 0; - } else if (!strnicmp(str_governor, "powersave", - CPUFREQ_NAME_LEN)) { + } else if (!strnicmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_POWERSAVE; err = 0; } @@ -341,8 +328,7 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy, t = __find_governor(str_governor); if (t == NULL) { - char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", - str_governor); + char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", str_governor); if (name) { int ret; @@ -375,8 +361,7 @@ extern struct sysdev_class cpu_sysdev_class; /** - * cpufreq_per_cpu_attr_read() / show_##file_name() - - * print out cpufreq information + * cpufreq_per_cpu_attr_read() / show_##file_name() - print out cpufreq information * * Write out information from cpufreq_driver->policy[cpu]; object must be * "unsigned int". @@ -395,8 +380,7 @@ show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); show_one(scaling_cur_freq, cur); -static int __cpufreq_set_policy(struct cpufreq_policy *data, - struct cpufreq_policy *policy); +static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy); /** * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access @@ -432,8 +416,7 @@ store_one(scaling_max_freq,max); /** * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware */ -static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, - char *buf) +static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, char *buf) { unsigned int cur_freq = cpufreq_get(policy->cpu); if (!cur_freq) @@ -445,8 +428,7 @@ static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy, /** * show_scaling_governor - show the current policy for the specified CPU */ -static ssize_t show_scaling_governor (struct cpufreq_policy * policy, - char *buf) +static ssize_t show_scaling_governor (struct cpufreq_policy * policy, char *buf) { if(policy->policy == CPUFREQ_POLICY_POWERSAVE) return sprintf(buf, "powersave\n"); @@ -476,8 +458,7 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy, if (ret != 1) return -EINVAL; - if (cpufreq_parse_governor(str_governor, &new_policy.policy, - &new_policy.governor)) + if (cpufreq_parse_governor(str_governor, &new_policy.policy, &new_policy.governor)) return -EINVAL; lock_cpu_hotplug(); @@ -493,10 +474,7 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy, unlock_cpu_hotplug(); - if (ret) - return ret; - else - return count; + return ret ? ret : count; } /** @@ -510,7 +488,7 @@ static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf) /** * show_scaling_available_governors - show the available CPUfreq governors */ -static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy, +static ssize_t show_scaling_available_governors (struct cpufreq_policy * policy, char *buf) { ssize_t i = 0; @@ -596,11 +574,7 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf) policy = cpufreq_cpu_get(policy->cpu); if (!policy) return -EINVAL; - if (fattr->show) - ret = fattr->show(policy, buf); - else - ret = -EIO; - + ret = fattr->show ? fattr->show(policy,buf) : -EIO; cpufreq_cpu_put(policy); return ret; } @@ -614,11 +588,7 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr, policy = cpufreq_cpu_get(policy->cpu); if (!policy) return -EINVAL; - if (fattr->store) - ret = fattr->store(policy, buf, count); - else - ret = -EIO; - + ret = fattr->store ? fattr->store(policy,buf,count) : -EIO; cpufreq_cpu_put(policy); return ret; } @@ -941,8 +911,7 @@ static void handle_update(void *data) * We adjust to current frequency first, and need to clean up later. So either call * to cpufreq_update_policy() or schedule handle_update()). */ -static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, - unsigned int new_freq) +static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigned int new_freq) { struct cpufreq_freqs freqs; @@ -967,16 +936,16 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigned int cpufreq_quick_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - unsigned int ret_freq = 0; + unsigned int ret = 0; if (policy) { mutex_lock(&policy->lock); - ret_freq = policy->cur; + ret = policy->cur; mutex_unlock(&policy->lock); cpufreq_cpu_put(policy); } - return (ret_freq); + return (ret); } EXPORT_SYMBOL(cpufreq_quick_get); @@ -990,7 +959,7 @@ EXPORT_SYMBOL(cpufreq_quick_get); unsigned int cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - unsigned int ret_freq = 0; + unsigned int ret = 0; if (!policy) return 0; @@ -1000,14 +969,12 @@ unsigned int cpufreq_get(unsigned int cpu) mutex_lock(&policy->lock); - ret_freq = cpufreq_driver->get(cpu); + ret = cpufreq_driver->get(cpu); - if (ret_freq && policy->cur && - !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { - /* verify no discrepancy between actual and - saved value exists */ - if (unlikely(ret_freq != policy->cur)) { - cpufreq_out_of_sync(cpu, policy->cur, ret_freq); + if (ret && policy->cur && !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { + /* verify no discrepancy between actual and saved value exists */ + if (unlikely(ret != policy->cur)) { + cpufreq_out_of_sync(cpu, policy->cur, ret); schedule_work(&policy->update); } } @@ -1017,7 +984,7 @@ unsigned int cpufreq_get(unsigned int cpu) out: cpufreq_cpu_put(policy); - return (ret_freq); + return (ret); } EXPORT_SYMBOL(cpufreq_get); @@ -1029,7 +996,7 @@ EXPORT_SYMBOL(cpufreq_get); static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) { int cpu = sysdev->id; - int ret = 0; + unsigned int ret = 0; unsigned int cur_freq = 0; struct cpufreq_policy *cpu_policy; @@ -1111,7 +1078,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) static int cpufreq_resume(struct sys_device * sysdev) { int cpu = sysdev->id; - int ret = 0; + unsigned int ret = 0; struct cpufreq_policy *cpu_policy; dprintk("resuming cpu %u\n", cpu); @@ -1307,45 +1274,22 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_target); -int cpufreq_driver_getavg(struct cpufreq_policy *policy) -{ - int ret = 0; - - policy = cpufreq_cpu_get(policy->cpu); - if (!policy) - return -EINVAL; - - mutex_lock(&policy->lock); - - if (cpu_online(policy->cpu) && cpufreq_driver->getavg) - ret = cpufreq_driver->getavg(policy->cpu); - - mutex_unlock(&policy->lock); - - cpufreq_cpu_put(policy); - return ret; -} -EXPORT_SYMBOL_GPL(cpufreq_driver_getavg); - /* * Locking: Must be called with the lock_cpu_hotplug() lock held * when "event" is CPUFREQ_GOV_LIMITS */ -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event) +static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { int ret; if (!try_module_get(policy->governor->owner)) return -EINVAL; - dprintk("__cpufreq_governor for CPU %u, event %u\n", - policy->cpu, event); + dprintk("__cpufreq_governor for CPU %u, event %u\n", policy->cpu, event); ret = policy->governor->governor(policy, event); - /* we keep one module reference alive for - each CPU governed by this CPU */ + /* we keep one module reference alive for each CPU governed by this CPU */ if ((event != CPUFREQ_GOV_START) || ret) module_put(policy->governor->owner); if ((event == CPUFREQ_GOV_STOP) && !ret) @@ -1421,12 +1365,9 @@ EXPORT_SYMBOL(cpufreq_get_policy); /* - * data : current policy. - * policy : policy to be set. * Locking: Must be called with the lock_cpu_hotplug() lock held */ -static int __cpufreq_set_policy(struct cpufreq_policy *data, - struct cpufreq_policy *policy) +static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy) { int ret = 0; @@ -1434,8 +1375,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, policy->min, policy->max); - memcpy(&policy->cpuinfo, &data->cpuinfo, - sizeof(struct cpufreq_cpuinfo)); + memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); if (policy->min > data->min && policy->min > policy->max) { ret = -EINVAL; @@ -1468,8 +1408,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, data->min = policy->min; data->max = policy->max; - dprintk("new min and max freqs are %u - %u kHz\n", - data->min, data->max); + dprintk("new min and max freqs are %u - %u kHz\n", data->min, data->max); if (cpufreq_driver->setpolicy) { data->policy = policy->policy; @@ -1490,12 +1429,10 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, data->governor = policy->governor; if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { /* new governor failed, so re-start old one */ - dprintk("starting governor %s failed\n", - data->governor->name); + dprintk("starting governor %s failed\n", data->governor->name); if (old_gov) { data->governor = old_gov; - __cpufreq_governor(data, - CPUFREQ_GOV_START); + __cpufreq_governor(data, CPUFREQ_GOV_START); } ret = -EINVAL; goto error_out; @@ -1585,8 +1522,7 @@ int cpufreq_update_policy(unsigned int cpu) data->cur = policy.cur; } else { if (data->cur != policy.cur) - cpufreq_out_of_sync(cpu, data->cur, - policy.cur); + cpufreq_out_of_sync(cpu, data->cur, policy.cur); } } @@ -1690,10 +1626,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) /* if all ->init() calls failed, unregister */ if (ret) { - dprintk("no CPU initialized for driver %s\n", - driver_data->name); - sysdev_driver_unregister(&cpu_sysdev_class, - &cpufreq_sysdev_driver); + dprintk("no CPU initialized for driver %s\n", driver_data->name); + sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); spin_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; diff --git a/trunk/drivers/cpufreq/cpufreq_conservative.c b/trunk/drivers/cpufreq/cpufreq_conservative.c index 29905b4bf8c8..c4c578defabf 100644 --- a/trunk/drivers/cpufreq/cpufreq_conservative.c +++ b/trunk/drivers/cpufreq/cpufreq_conservative.c @@ -44,17 +44,15 @@ * latency of the processor. The governor will work on any processor with * transition latency <= 10mS, using appropriate sampling * rate. - * For CPUs with transition latency > 10mS (mostly drivers - * with CPUFREQ_ETERNAL), this governor will not work. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. * All times here are in uS. */ static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) /* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) -#define MIN_SAMPLING_RATE \ - (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) +#define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) +#define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) #define MAX_SAMPLING_RATE (500 * def_sampling_rate) #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) #define DEF_SAMPLING_DOWN_FACTOR (1) @@ -105,16 +103,11 @@ static struct dbs_tuners dbs_tuners_ins = { static inline unsigned int get_cpu_idle_time(unsigned int cpu) { - unsigned int add_nice = 0, ret; - - if (dbs_tuners_ins.ignore_nice) - add_nice = kstat_cpu(cpu).cpustat.nice; - - ret = kstat_cpu(cpu).cpustat.idle + + return kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait + - add_nice; - - return ret; + ( dbs_tuners_ins.ignore_nice ? + kstat_cpu(cpu).cpustat.nice : + 0); } /************************** sysfs interface ************************/ @@ -460,7 +453,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int cpu = policy->cpu; struct cpu_dbs_info_s *this_dbs_info; unsigned int j; - int rc; this_dbs_info = &per_cpu(cpu_dbs_info, cpu); @@ -477,13 +469,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; mutex_lock(&dbs_mutex); - - rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); - if (rc) { - mutex_unlock(&dbs_mutex); - return rc; - } - for_each_cpu_mask(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); @@ -496,7 +481,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, this_dbs_info->enable = 1; this_dbs_info->down_skip = 0; this_dbs_info->requested_freq = policy->cur; - + sysfs_create_group(&policy->kobj, &dbs_attr_group); dbs_enable++; /* * Start the timerschedule work, when this governor diff --git a/trunk/drivers/cpufreq/cpufreq_ondemand.c b/trunk/drivers/cpufreq/cpufreq_ondemand.c index 048ec8b1f406..bf8aa45d4f01 100644 --- a/trunk/drivers/cpufreq/cpufreq_ondemand.c +++ b/trunk/drivers/cpufreq/cpufreq_ondemand.c @@ -41,10 +41,8 @@ static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) /* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) -#define MIN_SAMPLING_RATE \ - (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) +#define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) +#define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) #define MAX_SAMPLING_RATE (500 * def_sampling_rate) #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) #define TRANSITION_LATENCY_LIMIT (10 * 1000) @@ -204,8 +202,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, ret = sscanf(buf, "%u", &input); mutex_lock(&dbs_mutex); - if (ret != 1 || input > MAX_SAMPLING_RATE - || input < MIN_SAMPLING_RATE) { + if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { mutex_unlock(&dbs_mutex); return -EINVAL; } @@ -396,15 +393,8 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * policy. To be safe, we focus 10 points under the threshold. */ if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next, freq_cur; - - freq_cur = cpufreq_driver_getavg(policy); - if (!freq_cur) - freq_cur = policy->cur; - - freq_next = (freq_cur * load) / + unsigned int freq_next = (policy->cur * load) / (dbs_tuners_ins.up_threshold - 10); - if (!dbs_tuners_ins.powersave_bias) { __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); @@ -476,7 +466,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int cpu = policy->cpu; struct cpu_dbs_info_s *this_dbs_info; unsigned int j; - int rc; this_dbs_info = &per_cpu(cpu_dbs_info, cpu); @@ -499,23 +488,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (dbs_enable == 1) { kondemand_wq = create_workqueue("kondemand"); if (!kondemand_wq) { - printk(KERN_ERR - "Creation of kondemand failed\n"); + printk(KERN_ERR "Creation of kondemand failed\n"); dbs_enable--; mutex_unlock(&dbs_mutex); return -ENOSPC; } } - - rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); - if (rc) { - if (dbs_enable == 1) - destroy_workqueue(kondemand_wq); - dbs_enable--; - mutex_unlock(&dbs_mutex); - return rc; - } - for_each_cpu_mask(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); @@ -525,6 +503,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info->prev_cpu_wall = get_jiffies_64(); } this_dbs_info->enable = 1; + sysfs_create_group(&policy->kobj, &dbs_attr_group); /* * Start the timerschedule work, when this governor * is used for first time diff --git a/trunk/drivers/cpufreq/cpufreq_performance.c b/trunk/drivers/cpufreq/cpufreq_performance.c index e8e1451ef1c1..de91e3371ef8 100644 --- a/trunk/drivers/cpufreq/cpufreq_performance.c +++ b/trunk/drivers/cpufreq/cpufreq_performance.c @@ -15,8 +15,7 @@ #include #include -#define dprintk(msg...) \ - cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg) static int cpufreq_governor_performance(struct cpufreq_policy *policy, @@ -25,10 +24,8 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_START: case CPUFREQ_GOV_LIMITS: - dprintk("setting to %u kHz because of event %u\n", - policy->max, event); - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); + dprintk("setting to %u kHz because of event %u\n", policy->max, event); + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); break; default: break; diff --git a/trunk/drivers/cpufreq/cpufreq_powersave.c b/trunk/drivers/cpufreq/cpufreq_powersave.c index 13fe06b94b0a..0a2596044e65 100644 --- a/trunk/drivers/cpufreq/cpufreq_powersave.c +++ b/trunk/drivers/cpufreq/cpufreq_powersave.c @@ -15,8 +15,7 @@ #include #include -#define dprintk(msg...) \ - cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg) static int cpufreq_governor_powersave(struct cpufreq_policy *policy, unsigned int event) @@ -24,10 +23,8 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, switch (event) { case CPUFREQ_GOV_START: case CPUFREQ_GOV_LIMITS: - dprintk("setting to %u kHz because of event %u\n", - policy->min, event); - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); + dprintk("setting to %u kHz because of event %u\n", policy->min, event); + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); break; default: break; diff --git a/trunk/drivers/cpufreq/cpufreq_stats.c b/trunk/drivers/cpufreq/cpufreq_stats.c index 6742b1adf2c8..c2ecc599dc5f 100644 --- a/trunk/drivers/cpufreq/cpufreq_stats.c +++ b/trunk/drivers/cpufreq/cpufreq_stats.c @@ -351,8 +351,8 @@ __init cpufreq_stats_init(void) register_hotcpu_notifier(&cpufreq_stat_cpu_notifier); for_each_online_cpu(cpu) { - cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, - CPU_ONLINE, (void *)(long)cpu); + cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE, + (void *)(long)cpu); } return 0; } @@ -368,15 +368,14 @@ __exit cpufreq_stats_exit(void) unregister_hotcpu_notifier(&cpufreq_stat_cpu_notifier); lock_cpu_hotplug(); for_each_online_cpu(cpu) { - cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, - CPU_DEAD, (void *)(long)cpu); + cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_DEAD, + (void *)(long)cpu); } unlock_cpu_hotplug(); } MODULE_AUTHOR ("Zou Nan hai "); -MODULE_DESCRIPTION ("'cpufreq_stats' - A driver to export cpufreq stats" - "through sysfs filesystem"); +MODULE_DESCRIPTION ("'cpufreq_stats' - A driver to export cpufreq stats through sysfs filesystem"); MODULE_LICENSE ("GPL"); module_init(cpufreq_stats_init); diff --git a/trunk/drivers/cpufreq/cpufreq_userspace.c b/trunk/drivers/cpufreq/cpufreq_userspace.c index 2a4eb0bfaf30..a06c204589cd 100644 --- a/trunk/drivers/cpufreq/cpufreq_userspace.c +++ b/trunk/drivers/cpufreq/cpufreq_userspace.c @@ -131,26 +131,19 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, unsigned int event) { unsigned int cpu = policy->cpu; - int rc = 0; - switch (event) { case CPUFREQ_GOV_START: if (!cpu_online(cpu)) return -EINVAL; BUG_ON(!policy->cur); mutex_lock(&userspace_mutex); - rc = sysfs_create_file (&policy->kobj, - &freq_attr_scaling_setspeed.attr); - if (rc) - goto start_out; - cpu_is_managed[cpu] = 1; cpu_min_freq[cpu] = policy->min; cpu_max_freq[cpu] = policy->max; cpu_cur_freq[cpu] = policy->cur; cpu_set_freq[cpu] = policy->cur; + sysfs_create_file (&policy->kobj, &freq_attr_scaling_setspeed.attr); dprintk("managing cpu %u started (%u - %u kHz, currently %u kHz)\n", cpu, cpu_min_freq[cpu], cpu_max_freq[cpu], cpu_cur_freq[cpu]); -start_out: mutex_unlock(&userspace_mutex); break; case CPUFREQ_GOV_STOP: @@ -187,7 +180,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, mutex_unlock(&userspace_mutex); break; } - return rc; + return 0; } diff --git a/trunk/drivers/cpufreq/freq_table.c b/trunk/drivers/cpufreq/freq_table.c index e7490925fdcf..551f4ccf87fd 100644 --- a/trunk/drivers/cpufreq/freq_table.c +++ b/trunk/drivers/cpufreq/freq_table.c @@ -9,8 +9,7 @@ #include #include -#define dprintk(msg...) \ - cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg) /********************************************************************* * FREQUENCY TABLE HELPERS * @@ -30,8 +29,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, continue; } - dprintk("table entry %u: %u kHz, %u index\n", - i, freq, table[i].index); + dprintk("table entry %u: %u kHz, %u index\n", i, freq, table[i].index); if (freq < min_freq) min_freq = freq; if (freq > max_freq) @@ -56,14 +54,13 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, unsigned int i; unsigned int count = 0; - dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n", - policy->min, policy->max, policy->cpu); + dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n", policy->min, policy->max, policy->cpu); if (!cpu_online(policy->cpu)) return -EINVAL; - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, policy->cpuinfo.max_freq); for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { unsigned int freq = table[i].frequency; @@ -78,11 +75,10 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, if (!count) policy->max = next_larger; - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, policy->cpuinfo.max_freq); - dprintk("verification lead to (%u - %u kHz) for cpu %u\n", - policy->min, policy->max, policy->cpu); + dprintk("verification lead to (%u - %u kHz) for cpu %u\n", policy->min, policy->max, policy->cpu); return 0; } @@ -105,8 +101,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, }; unsigned int i; - dprintk("request for target %u kHz (relation: %u) for cpu %u\n", - target_freq, relation, policy->cpu); + dprintk("request for target %u kHz (relation: %u) for cpu %u\n", target_freq, relation, policy->cpu); switch (relation) { case CPUFREQ_RELATION_H: @@ -197,10 +192,7 @@ static ssize_t show_available_freqs (struct cpufreq_policy *policy, char *buf) } struct freq_attr cpufreq_freq_attr_scaling_available_freqs = { - .attr = { .name = "scaling_available_frequencies", - .mode = 0444, - .owner=THIS_MODULE - }, + .attr = { .name = "scaling_available_frequencies", .mode = 0444, .owner=THIS_MODULE }, .show = show_available_freqs, }; EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs); diff --git a/trunk/drivers/eisa/eisa-bus.c b/trunk/drivers/eisa/eisa-bus.c index d944647c82c2..3a365e159d89 100644 --- a/trunk/drivers/eisa/eisa-bus.c +++ b/trunk/drivers/eisa/eisa-bus.c @@ -226,26 +226,14 @@ static int __init eisa_init_device (struct eisa_root_device *root, static int __init eisa_register_device (struct eisa_device *edev) { - int rc = device_register (&edev->dev); - if (rc) - return rc; + if (device_register (&edev->dev)) + return -1; - rc = device_create_file (&edev->dev, &dev_attr_signature); - if (rc) goto err_devreg; - rc = device_create_file (&edev->dev, &dev_attr_enabled); - if (rc) goto err_sig; - rc = device_create_file (&edev->dev, &dev_attr_modalias); - if (rc) goto err_enab; + device_create_file (&edev->dev, &dev_attr_signature); + device_create_file (&edev->dev, &dev_attr_enabled); + device_create_file (&edev->dev, &dev_attr_modalias); return 0; - -err_enab: - device_remove_file (&edev->dev, &dev_attr_enabled); -err_sig: - device_remove_file (&edev->dev, &dev_attr_signature); -err_devreg: - device_unregister(&edev->dev); - return rc; } static int __init eisa_request_resources (struct eisa_root_device *root, diff --git a/trunk/drivers/firmware/dell_rbu.c b/trunk/drivers/firmware/dell_rbu.c index 08b161798443..fc17599c905e 100644 --- a/trunk/drivers/firmware/dell_rbu.c +++ b/trunk/drivers/firmware/dell_rbu.c @@ -249,7 +249,7 @@ static int packetize_data(void *data, size_t length) if ((rc = create_packet(temp, packet_length))) return rc; - pr_debug("%p:%td\n", temp, (end - temp)); + pr_debug("%p:%lu\n", temp, (end - temp)); temp += packet_length; } @@ -718,27 +718,14 @@ static int __init dcdrbu_init(void) return -EIO; } - rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); - if (rc) - goto out_devreg; - rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); - if (rc) - goto out_data; - rc = sysfs_create_bin_file(&rbu_device->dev.kobj, + sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); + sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); + sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_packet_size_attr); - if (rc) - goto out_imtype; rbu_data.entry_created = 0; - return 0; - -out_imtype: - sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); -out_data: - sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); -out_devreg: - platform_device_unregister(rbu_device); return rc; + } static __exit void dcdrbu_exit(void) diff --git a/trunk/drivers/firmware/efivars.c b/trunk/drivers/firmware/efivars.c index 5ab5e393b882..8ebce1c03ad7 100644 --- a/trunk/drivers/firmware/efivars.c +++ b/trunk/drivers/firmware/efivars.c @@ -639,12 +639,7 @@ efivar_create_sysfs_entry(unsigned long variable_name_size, kobject_set_name(&new_efivar->kobj, "%s", short_name); kobj_set_kset_s(new_efivar, vars_subsys); - i = kobject_register(&new_efivar->kobj); - if (i) { - kfree(short_name); - kfree(new_efivar); - return 1; - } + kobject_register(&new_efivar->kobj); kfree(short_name); short_name = NULL; diff --git a/trunk/drivers/ide/ide-cd.c b/trunk/drivers/ide/ide-cd.c index bddfebdf91d8..69bbb6206a00 100644 --- a/trunk/drivers/ide/ide-cd.c +++ b/trunk/drivers/ide/ide-cd.c @@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq) struct cdrom_info *cd = drive->driver_data; ide_init_drive_cmd(rq); - rq->cmd_type = REQ_TYPE_ATA_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->rq_disk = cd->disk; } @@ -716,7 +716,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) ide_error(drive, "request sense failure", stat); return 1; - } else if (blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { + } else if (blk_pc_request(rq)) { /* All other functions, except for READ. */ unsigned long flags; @@ -2023,8 +2023,7 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block) } info->last_block = block; return action; - } else if (rq->cmd_type == REQ_TYPE_SENSE || - rq->cmd_type == REQ_TYPE_ATA_PC) { + } else if (rq->cmd_type == REQ_TYPE_SENSE) { return cdrom_do_packet_command(drive); } else if (blk_pc_request(rq)) { return cdrom_do_block_pc(drive, rq); diff --git a/trunk/drivers/ide/pci/generic.c b/trunk/drivers/ide/pci/generic.c index 5b77a5bcbf0c..965c43659e35 100644 --- a/trunk/drivers/ide/pci/generic.c +++ b/trunk/drivers/ide/pci/generic.c @@ -237,12 +237,10 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi if (dev->vendor == PCI_VENDOR_ID_JMICRON && PCI_FUNC(dev->devfn) != 1) goto out; - if (dev->vendor != PCI_VENDOR_ID_JMICRON) { - pci_read_config_word(dev, PCI_COMMAND, &command); - if (!(command & PCI_COMMAND_IO)) { - printk(KERN_INFO "Skipping disabled %s IDE controller.\n", d->name); - goto out; - } + pci_read_config_word(dev, PCI_COMMAND, &command); + if (!(command & PCI_COMMAND_IO)) { + printk(KERN_INFO "Skipping disabled %s IDE controller.\n", d->name); + goto out; } ret = ide_setup_pci_device(dev, d); out: diff --git a/trunk/drivers/input/misc/hp_sdc_rtc.c b/trunk/drivers/input/misc/hp_sdc_rtc.c index ab4da79ee560..1be963961c15 100644 --- a/trunk/drivers/input/misc/hp_sdc_rtc.c +++ b/trunk/drivers/input/misc/hp_sdc_rtc.c @@ -60,7 +60,7 @@ static struct fasync_struct *hp_sdc_rtc_async_queue; static DECLARE_WAIT_QUEUE_HEAD(hp_sdc_rtc_wait); -static ssize_t hp_sdc_rtc_read(struct file *file, char __user *buf, +static ssize_t hp_sdc_rtc_read(struct file *file, char *buf, size_t count, loff_t *ppos); static int hp_sdc_rtc_ioctl(struct inode *inode, struct file *file, @@ -385,14 +385,14 @@ static int hp_sdc_rtc_set_i8042timer (struct timeval *setto, uint8_t setcmd) return 0; } -static ssize_t hp_sdc_rtc_read(struct file *file, char __user *buf, +static ssize_t hp_sdc_rtc_read(struct file *file, char *buf, size_t count, loff_t *ppos) { ssize_t retval; if (count < sizeof(unsigned long)) return -EINVAL; - retval = put_user(68, (unsigned long __user *)buf); + retval = put_user(68, (unsigned long *)buf); return retval; } @@ -696,7 +696,7 @@ static int __init hp_sdc_rtc_init(void) if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) return ret; misc_register(&hp_sdc_rtc_dev); - create_proc_read_entry ("driver/rtc", 0, NULL, + create_proc_read_entry ("driver/rtc", 0, 0, hp_sdc_rtc_read_proc, NULL); printk(KERN_INFO "HP i8042 SDC + MSM-58321 RTC support loaded " diff --git a/trunk/drivers/input/misc/wistron_btns.c b/trunk/drivers/input/misc/wistron_btns.c index 7b9d1c1da41a..4639537336fc 100644 --- a/trunk/drivers/input/misc/wistron_btns.c +++ b/trunk/drivers/input/misc/wistron_btns.c @@ -17,7 +17,7 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place Suite 330, Boston, MA 02111-1307, USA. */ -#include +#include #include #include #include diff --git a/trunk/drivers/input/serio/hil_mlc.c b/trunk/drivers/input/serio/hil_mlc.c index 49e11e2c1d5d..bdfde046b741 100644 --- a/trunk/drivers/input/serio/hil_mlc.c +++ b/trunk/drivers/input/serio/hil_mlc.c @@ -391,23 +391,23 @@ static int hilse_operate(hil_mlc *mlc, int repoll) { } #define FUNC(funct, funct_arg, zero_rc, neg_rc, pos_rc) \ -{ HILSE_FUNC, { .func = funct }, funct_arg, zero_rc, neg_rc, pos_rc }, +{ HILSE_FUNC, { func: &funct }, funct_arg, zero_rc, neg_rc, pos_rc }, #define OUT(pack) \ -{ HILSE_OUT, { .packet = pack }, 0, HILSEN_NEXT, HILSEN_DOZE, 0 }, +{ HILSE_OUT, { packet: pack }, 0, HILSEN_NEXT, HILSEN_DOZE, 0 }, #define CTS \ -{ HILSE_CTS, { .packet = 0 }, 0, HILSEN_NEXT | HILSEN_SCHED | HILSEN_BREAK, HILSEN_DOZE, 0 }, +{ HILSE_CTS, { packet: 0 }, 0, HILSEN_NEXT | HILSEN_SCHED | HILSEN_BREAK, HILSEN_DOZE, 0 }, #define EXPECT(comp, to, got, got_wrong, timed_out) \ -{ HILSE_EXPECT, { .packet = comp }, to, got, got_wrong, timed_out }, +{ HILSE_EXPECT, { packet: comp }, to, got, got_wrong, timed_out }, #define EXPECT_LAST(comp, to, got, got_wrong, timed_out) \ -{ HILSE_EXPECT_LAST, { .packet = comp }, to, got, got_wrong, timed_out }, +{ HILSE_EXPECT_LAST, { packet: comp }, to, got, got_wrong, timed_out }, #define EXPECT_DISC(comp, to, got, got_wrong, timed_out) \ -{ HILSE_EXPECT_DISC, { .packet = comp }, to, got, got_wrong, timed_out }, +{ HILSE_EXPECT_DISC, { packet: comp }, to, got, got_wrong, timed_out }, #define IN(to, got, got_error, timed_out) \ -{ HILSE_IN, { .packet = 0 }, to, got, got_error, timed_out }, +{ HILSE_IN, { packet: 0 }, to, got, got_error, timed_out }, #define OUT_DISC(pack) \ -{ HILSE_OUT_DISC, { .packet = pack }, 0, 0, 0, 0 }, +{ HILSE_OUT_DISC, { packet: pack }, 0, 0, 0, 0 }, #define OUT_LAST(pack) \ -{ HILSE_OUT_LAST, { .packet = pack }, 0, 0, 0, 0 }, +{ HILSE_OUT_LAST, { packet: pack }, 0, 0, 0, 0 }, struct hilse_node hil_mlc_se[HILSEN_END] = { diff --git a/trunk/drivers/input/serio/hp_sdc.c b/trunk/drivers/input/serio/hp_sdc.c index 9907ad3bea23..ba7b920347e3 100644 --- a/trunk/drivers/input/serio/hp_sdc.c +++ b/trunk/drivers/input/serio/hp_sdc.c @@ -310,7 +310,7 @@ static void hp_sdc_tasklet(unsigned long foo) { * in tasklet/bh context. */ if (curr->act.irqhook) - curr->act.irqhook(0, NULL, 0, 0); + curr->act.irqhook(0, 0, 0, 0); } curr->actidx = curr->idx; curr->idx++; @@ -525,7 +525,7 @@ unsigned long hp_sdc_put(void) { up(curr->act.semaphore); } else if (act & HP_SDC_ACT_CALLBACK) { - curr->act.irqhook(0,NULL,0,0); + curr->act.irqhook(0,0,0,0); } if (curr->idx >= curr->endidx) { /* This transaction is over. */ if (act & HP_SDC_ACT_DEALLOC) kfree(curr); diff --git a/trunk/drivers/isdn/pcbit/layer2.c b/trunk/drivers/isdn/pcbit/layer2.c index 937fd2120381..13e7d219d1c7 100644 --- a/trunk/drivers/isdn/pcbit/layer2.c +++ b/trunk/drivers/isdn/pcbit/layer2.c @@ -311,7 +311,6 @@ pcbit_deliver(void *data) dev->read_queue = frame->next; spin_unlock_irqrestore(&dev->lock, flags); - msg = 0; SET_MSG_CPU(msg, 0); SET_MSG_PROC(msg, 0); SET_MSG_CMD(msg, frame->skb->data[2]); diff --git a/trunk/drivers/isdn/sc/init.c b/trunk/drivers/isdn/sc/init.c index 06c9872e8c6a..222ca7c08baa 100644 --- a/trunk/drivers/isdn/sc/init.c +++ b/trunk/drivers/isdn/sc/init.c @@ -98,14 +98,13 @@ static int __init sc_init(void) * Confirm the I/O Address with a test */ if(io[b] == 0) { - pr_debug("I/O Address invalid.\n"); + pr_debug("I/O Address 0x%x is in use.\n"); continue; } outb(0x18, io[b] + 0x400 * EXP_PAGE0); if(inb(io[b] + 0x400 * EXP_PAGE0) != 0x18) { - pr_debug("I/O Base 0x%x fails test\n", - io[b] + 0x400 * EXP_PAGE0); + pr_debug("I/O Base 0x%x fails test\n"); continue; } } @@ -159,8 +158,8 @@ static int __init sc_init(void) outb(0xFF, io[b] + RESET_OFFSET); msleep_interruptible(10000); } - pr_debug("RAM Base for board %d is 0x%lx, %s probe\n", b, - ram[b], ram[b] == 0 ? "will" : "won't"); + pr_debug("RAM Base for board %d is 0x%x, %s probe\n", b, ram[b], + ram[b] == 0 ? "will" : "won't"); if(ram[b]) { /* @@ -169,7 +168,7 @@ static int __init sc_init(void) * board model */ if(request_region(ram[b], SRAM_PAGESIZE, "sc test")) { - pr_debug("request_region for RAM base 0x%lx succeeded\n", ram[b]); + pr_debug("request_region for RAM base 0x%x succeeded\n", ram[b]); model = identify_board(ram[b], io[b]); release_region(ram[b], SRAM_PAGESIZE); } @@ -205,7 +204,7 @@ static int __init sc_init(void) * Nope, there was no place in RAM for the * board, or it couldn't be identified */ - pr_debug("Failed to find an adapter at 0x%lx\n", ram[b]); + pr_debug("Failed to find an adapter at 0x%x\n", ram[b]); continue; } @@ -452,7 +451,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) HWConfig_pl hwci; int x; - pr_debug("Attempting to identify adapter @ 0x%lx io 0x%x\n", + pr_debug("Attempting to identify adapter @ 0x%x io 0x%x\n", rambase, iobase); /* @@ -491,7 +490,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) outb(PRI_BASEPG_VAL, pgport); msleep_interruptible(1000); sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%lx\n", sig); + pr_debug("Looking for a signature, got 0x%x\n", sig); if(sig == SIGNATURE) return PRI_BOARD; @@ -501,7 +500,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) outb(BRI_BASEPG_VAL, pgport); msleep_interruptible(1000); sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%lx\n", sig); + pr_debug("Looking for a signature, got 0x%x\n", sig); if(sig == SIGNATURE) return BRI_BOARD; @@ -511,7 +510,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) * Try to spot a card */ sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%lx\n", sig); + pr_debug("Looking for a signature, got 0x%x\n", sig); if(sig != SIGNATURE) return -1; @@ -541,7 +540,7 @@ static int identify_board(unsigned long rambase, unsigned int iobase) memcpy_fromio(&rcvmsg, &(dpm->rsp_queue[dpm->rsp_tail]), MSG_LEN); pr_debug("Got HWConfig response, status = 0x%x\n", rcvmsg.rsp_status); memcpy(&hwci, &(rcvmsg.msg_data.HWCresponse), sizeof(HWConfig_pl)); - pr_debug("Hardware Config: Interface: %s, RAM Size: %ld, Serial: %s\n" + pr_debug("Hardware Config: Interface: %s, RAM Size: %d, Serial: %s\n" " Part: %s, Rev: %s\n", hwci.st_u_sense ? "S/T" : "U", hwci.ram_size, hwci.serial_no, hwci.part_no, hwci.rev_no); diff --git a/trunk/drivers/isdn/sc/packet.c b/trunk/drivers/isdn/sc/packet.c index 1e04676b016b..f50defc38ae5 100644 --- a/trunk/drivers/isdn/sc/packet.c +++ b/trunk/drivers/isdn/sc/packet.c @@ -44,7 +44,7 @@ int sndpkt(int devId, int channel, struct sk_buff *data) return -ENODEV; } - pr_debug("%s: sndpkt: frst = 0x%lx nxt = %d f = %d n = %d\n", + pr_debug("%s: sndpkt: frst = 0x%x nxt = %d f = %d n = %d\n", sc_adapter[card]->devicename, sc_adapter[card]->channel[channel].first_sendbuf, sc_adapter[card]->channel[channel].next_sendbuf, @@ -66,7 +66,7 @@ int sndpkt(int devId, int channel, struct sk_buff *data) ReqLnkWrite.buff_offset = sc_adapter[card]->channel[channel].next_sendbuf * BUFFER_SIZE + sc_adapter[card]->channel[channel].first_sendbuf; ReqLnkWrite.msg_len = data->len; /* sk_buff size */ - pr_debug("%s: writing %d bytes to buffer offset 0x%lx\n", + pr_debug("%s: writing %d bytes to buffer offset 0x%x\n", sc_adapter[card]->devicename, ReqLnkWrite.msg_len, ReqLnkWrite.buff_offset); memcpy_toshmem(card, (char *)ReqLnkWrite.buff_offset, data->data, ReqLnkWrite.msg_len); @@ -74,7 +74,7 @@ int sndpkt(int devId, int channel, struct sk_buff *data) /* * sendmessage */ - pr_debug("%s: sndpkt size=%d, buf_offset=0x%lx buf_indx=%d\n", + pr_debug("%s: sndpkt size=%d, buf_offset=0x%x buf_indx=%d\n", sc_adapter[card]->devicename, ReqLnkWrite.msg_len, ReqLnkWrite.buff_offset, sc_adapter[card]->channel[channel].next_sendbuf); @@ -124,7 +124,7 @@ void rcvpkt(int card, RspMessage *rcvmsg) return; } skb_put(skb, rcvmsg->msg_data.response.msg_len); - pr_debug("%s: getting data from offset: 0x%lx\n", + pr_debug("%s: getting data from offset: 0x%x\n", sc_adapter[card]->devicename, rcvmsg->msg_data.response.buff_offset); memcpy_fromshmem(card, @@ -143,7 +143,7 @@ void rcvpkt(int card, RspMessage *rcvmsg) /* memset_shmem(card, rcvmsg->msg_data.response.buff_offset, 0, BUFFER_SIZE); */ newll.buff_offset = rcvmsg->msg_data.response.buff_offset; newll.msg_len = BUFFER_SIZE; - pr_debug("%s: recycled buffer at offset 0x%lx size %d\n", + pr_debug("%s: recycled buffer at offset 0x%x size %d\n", sc_adapter[card]->devicename, newll.buff_offset, newll.msg_len); sendmessage(card, CEPID, ceReqTypeLnk, ceReqClass1, ceReqLnkRead, @@ -186,7 +186,7 @@ int setup_buffers(int card, int c) sc_adapter[card]->channel[c-1].num_sendbufs = nBuffers / 2; sc_adapter[card]->channel[c-1].free_sendbufs = nBuffers / 2; sc_adapter[card]->channel[c-1].next_sendbuf = 0; - pr_debug("%s: send buffer setup complete: first=0x%lx n=%d f=%d, nxt=%d\n", + pr_debug("%s: send buffer setup complete: first=0x%x n=%d f=%d, nxt=%d\n", sc_adapter[card]->devicename, sc_adapter[card]->channel[c-1].first_sendbuf, sc_adapter[card]->channel[c-1].num_sendbufs, @@ -203,7 +203,7 @@ int setup_buffers(int card, int c) ((sc_adapter[card]->channel[c-1].first_sendbuf + (nBuffers / 2) * buffer_size) + (buffer_size * i)); RcvBuffOffset.msg_len = buffer_size; - pr_debug("%s: adding RcvBuffer #%d offset=0x%lx sz=%d bufsz:%d\n", + pr_debug("%s: adding RcvBuffer #%d offset=0x%x sz=%d bufsz:%d\n", sc_adapter[card]->devicename, i + 1, RcvBuffOffset.buff_offset, RcvBuffOffset.msg_len,buffer_size); diff --git a/trunk/drivers/isdn/sc/shmem.c b/trunk/drivers/isdn/sc/shmem.c index 6f58862992db..24854826ca45 100644 --- a/trunk/drivers/isdn/sc/shmem.c +++ b/trunk/drivers/isdn/sc/shmem.c @@ -61,7 +61,7 @@ void memcpy_toshmem(int card, void *dest, const void *src, size_t n) spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); pr_debug("%s: set page to %#x\n",sc_adapter[card]->devicename, ((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE)>>14)|0x80); - pr_debug("%s: copying %d bytes from %#lx to %#lx\n", + pr_debug("%s: copying %d bytes from %#x to %#x\n", sc_adapter[card]->devicename, n, (unsigned long) src, sc_adapter[card]->rambase + ((unsigned long) dest %0x4000)); diff --git a/trunk/drivers/mca/mca-bus.c b/trunk/drivers/mca/mca-bus.c index da862e4632dd..09baa43b2599 100644 --- a/trunk/drivers/mca/mca-bus.c +++ b/trunk/drivers/mca/mca-bus.c @@ -100,7 +100,6 @@ static DEVICE_ATTR(pos, S_IRUGO, mca_show_pos, NULL); int __init mca_register_device(int bus, struct mca_device *mca_dev) { struct mca_bus *mca_bus = mca_root_busses[bus]; - int rc; mca_dev->dev.parent = &mca_bus->dev; mca_dev->dev.bus = &mca_bus_type; @@ -109,23 +108,13 @@ int __init mca_register_device(int bus, struct mca_device *mca_dev) mca_dev->dev.dma_mask = &mca_dev->dma_mask; mca_dev->dev.coherent_dma_mask = mca_dev->dma_mask; - rc = device_register(&mca_dev->dev); - if (rc) - goto err_out; + if (device_register(&mca_dev->dev)) + return 0; - rc = device_create_file(&mca_dev->dev, &dev_attr_id); - if (rc) goto err_out_devreg; - rc = device_create_file(&mca_dev->dev, &dev_attr_pos); - if (rc) goto err_out_id; + device_create_file(&mca_dev->dev, &dev_attr_id); + device_create_file(&mca_dev->dev, &dev_attr_pos); return 1; - -err_out_id: - device_remove_file(&mca_dev->dev, &dev_attr_id); -err_out_devreg: - device_unregister(&mca_dev->dev); -err_out: - return 0; } /* */ @@ -141,16 +130,13 @@ struct mca_bus * __devinit mca_attach_bus(int bus) return NULL; } - mca_bus = kzalloc(sizeof(struct mca_bus), GFP_KERNEL); + mca_bus = kmalloc(sizeof(struct mca_bus), GFP_KERNEL); if (!mca_bus) return NULL; - + memset(mca_bus, 0, sizeof(struct mca_bus)); sprintf(mca_bus->dev.bus_id,"mca%d",bus); sprintf(mca_bus->name,"Host %s MCA Bridge", bus ? "Secondary" : "Primary"); - if (device_register(&mca_bus->dev)) { - kfree(mca_bus); - return NULL; - } + device_register(&mca_bus->dev); mca_root_busses[bus] = mca_bus; diff --git a/trunk/drivers/md/bitmap.c b/trunk/drivers/md/bitmap.c index d47d38ac71b1..8e67634e79a0 100644 --- a/trunk/drivers/md/bitmap.c +++ b/trunk/drivers/md/bitmap.c @@ -1413,7 +1413,7 @@ int bitmap_create(mddev_t *mddev) int err; sector_t start; - BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); + BUG_ON(sizeof(bitmap_super_t) != 256); if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ return 0; diff --git a/trunk/drivers/media/dvb/bt8xx/dvb-bt8xx.c b/trunk/drivers/media/dvb/bt8xx/dvb-bt8xx.c index 14e69a736eda..fb6c4cc8477d 100644 --- a/trunk/drivers/media/dvb/bt8xx/dvb-bt8xx.c +++ b/trunk/drivers/media/dvb/bt8xx/dvb-bt8xx.c @@ -665,10 +665,6 @@ static void frontend_init(struct dvb_bt8xx_card *card, u32 type) case BTTV_BOARD_TWINHAN_DST: /* DST is not a frontend driver !!! */ state = (struct dst_state *) kmalloc(sizeof (struct dst_state), GFP_KERNEL); - if (!state) { - printk("dvb_bt8xx: No memory\n"); - break; - } /* Setup the Card */ state->config = &dst_config; state->i2c = card->i2c_adapter; diff --git a/trunk/drivers/media/dvb/dvb-core/Kconfig b/trunk/drivers/media/dvb/dvb-core/Kconfig index 1990eda10c46..e46eae3b9be2 100644 --- a/trunk/drivers/media/dvb/dvb-core/Kconfig +++ b/trunk/drivers/media/dvb/dvb-core/Kconfig @@ -19,6 +19,6 @@ config DVB_CORE_ATTACH allow the card drivers to only load the frontend modules they require. This saves several KBytes of memory. - Note: You will need module-init-tools v3.2 or later for this feature. + Note: You will need moudule-init-tools v3.2 or later for this feature. If unsure say Y. diff --git a/trunk/drivers/media/dvb/dvb-usb/dibusb-common.c b/trunk/drivers/media/dvb/dvb-usb/dibusb-common.c index 5143e426d283..fd3a9902f98d 100644 --- a/trunk/drivers/media/dvb/dvb-usb/dibusb-common.c +++ b/trunk/drivers/media/dvb/dvb-usb/dibusb-common.c @@ -169,7 +169,7 @@ EXPORT_SYMBOL(dibusb_read_eeprom_byte); // Config Adjacent channels Perf -cal22 static struct dibx000_agc_config dib3000p_mt2060_agc_config = { .band_caps = BAND_VHF | BAND_UHF, - .setup = (1 << 8) | (5 << 5) | (1 << 4) | (1 << 3) | (0 << 2) | (2 << 0), + .setup = (0 << 15) | (0 << 14) | (1 << 13) | (1 << 12) | (29 << 0), .agc1_max = 48497, .agc1_min = 23593, @@ -196,14 +196,10 @@ static struct dib3000mc_config stk3000p_dib3000p_config = { .ln_adc_level = 0x1cc7, .output_mpeg2_in_188_bytes = 1, - - .agc_command1 = 1, - .agc_command2 = 1, }; static struct dibx000_agc_config dib3000p_panasonic_agc_config = { - .band_caps = BAND_VHF | BAND_UHF, - .setup = (1 << 8) | (5 << 5) | (1 << 4) | (1 << 3) | (0 << 2) | (2 << 0), + .setup = (0 << 15) | (0 << 14) | (1 << 13) | (1 << 12) | (29 << 0), .agc1_max = 56361, .agc1_min = 22282, @@ -230,9 +226,6 @@ static struct dib3000mc_config mod3000p_dib3000p_config = { .ln_adc_level = 0x1cc7, .output_mpeg2_in_188_bytes = 1, - - .agc_command1 = 1, - .agc_command2 = 1, }; int dibusb_dib3000mc_frontend_attach(struct dvb_usb_adapter *adap) diff --git a/trunk/drivers/media/dvb/dvb-usb/dibusb.h b/trunk/drivers/media/dvb/dvb-usb/dibusb.h index b60781032742..5153fb943da1 100644 --- a/trunk/drivers/media/dvb/dvb-usb/dibusb.h +++ b/trunk/drivers/media/dvb/dvb-usb/dibusb.h @@ -99,9 +99,7 @@ struct dibusb_state { struct dib_fe_xfer_ops ops; int mt2060_present; -}; -struct dibusb_device_state { /* for RC5 remote control */ int old_toggle; int last_repeat_count; diff --git a/trunk/drivers/media/dvb/dvb-usb/nova-t-usb2.c b/trunk/drivers/media/dvb/dvb-usb/nova-t-usb2.c index a58874c790b2..a9219bf69b89 100644 --- a/trunk/drivers/media/dvb/dvb-usb/nova-t-usb2.c +++ b/trunk/drivers/media/dvb/dvb-usb/nova-t-usb2.c @@ -75,7 +75,7 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) u8 key[5],cmd[2] = { DIBUSB_REQ_POLL_REMOTE, 0x35 }, data,toggle,custom; u16 raw; int i; - struct dibusb_device_state *st = d->priv; + struct dibusb_state *st = d->priv; dvb_usb_generic_rw(d,cmd,2,key,5,0); @@ -184,7 +184,6 @@ static struct dvb_usb_device_properties nova_t_properties = { .size_of_priv = sizeof(struct dibusb_state), } }, - .size_of_priv = sizeof(struct dibusb_device_state), .power_ctrl = dibusb2_0_power_ctrl, .read_mac_address = nova_t_read_mac_address, diff --git a/trunk/drivers/media/dvb/frontends/dib3000mc.c b/trunk/drivers/media/dvb/frontends/dib3000mc.c index 3561a777568c..ccc813b525d6 100644 --- a/trunk/drivers/media/dvb/frontends/dib3000mc.c +++ b/trunk/drivers/media/dvb/frontends/dib3000mc.c @@ -345,7 +345,7 @@ static int dib3000mc_init(struct dvb_frontend *demod) /* agc */ dib3000mc_write_word(state, 36, state->cfg->max_time); - dib3000mc_write_word(state, 37, (state->cfg->agc_command1 << 13) | (state->cfg->agc_command2 << 12) | (0x1d << 0)); + dib3000mc_write_word(state, 37, agc->setup); dib3000mc_write_word(state, 38, state->cfg->pwm3_value); dib3000mc_write_word(state, 39, state->cfg->ln_adc_level); diff --git a/trunk/drivers/media/dvb/frontends/dib3000mc.h b/trunk/drivers/media/dvb/frontends/dib3000mc.h index 0d6fdef77538..b198cd5b1843 100644 --- a/trunk/drivers/media/dvb/frontends/dib3000mc.h +++ b/trunk/drivers/media/dvb/frontends/dib3000mc.h @@ -28,9 +28,6 @@ struct dib3000mc_config { u16 max_time; u16 ln_adc_level; - u8 agc_command1 :1; - u8 agc_command2 :1; - u8 mobile_mode; u8 output_mpeg2_in_188_bytes; diff --git a/trunk/drivers/media/dvb/frontends/tda10086.h b/trunk/drivers/media/dvb/frontends/tda10086.h index 18457adee30b..e8061db11123 100644 --- a/trunk/drivers/media/dvb/frontends/tda10086.h +++ b/trunk/drivers/media/dvb/frontends/tda10086.h @@ -35,16 +35,7 @@ struct tda10086_config u8 invert; }; -#if defined(CONFIG_DVB_TDA10086) || defined(CONFIG_DVB_TDA10086_MODULE) extern struct dvb_frontend* tda10086_attach(const struct tda10086_config* config, struct i2c_adapter* i2c); -#else -static inline struct dvb_frontend* tda10086_attach(const struct tda10086_config* config, - struct i2c_adapter* i2c) -{ - printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __FUNCTION__); - return NULL; -} -#endif // CONFIG_DVB_TDA10086 #endif // TDA10086_H diff --git a/trunk/drivers/media/dvb/frontends/tda826x.h b/trunk/drivers/media/dvb/frontends/tda826x.h index 83998c001196..3307607632b0 100644 --- a/trunk/drivers/media/dvb/frontends/tda826x.h +++ b/trunk/drivers/media/dvb/frontends/tda826x.h @@ -35,19 +35,6 @@ * @param has_loopthrough Set to 1 if the card has a loopthrough RF connector. * @return FE pointer on success, NULL on failure. */ -#if defined(CONFIG_DVB_TDA826X) || defined(CONFIG_DVB_TDA826X_MODULE) -extern struct dvb_frontend* tda826x_attach(struct dvb_frontend *fe, int addr, - struct i2c_adapter *i2c, - int has_loopthrough); -#else -static inline struct dvb_frontend* tda826x_attach(struct dvb_frontend *fe, - int addr, - struct i2c_adapter *i2c, - int has_loopthrough) -{ - printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __FUNCTION__); - return NULL; -} -#endif // CONFIG_DVB_TDA826X - -#endif // __DVB_TDA826X_H__ +extern struct dvb_frontend *tda826x_attach(struct dvb_frontend *fe, int addr, struct i2c_adapter *i2c, int has_loopthrough); + +#endif diff --git a/trunk/drivers/media/video/Kconfig b/trunk/drivers/media/video/Kconfig index fbe5b6168cc2..afb734df6e05 100644 --- a/trunk/drivers/media/video/Kconfig +++ b/trunk/drivers/media/video/Kconfig @@ -677,8 +677,6 @@ config VIDEO_M32R_AR_M64278 menu "V4L USB devices" depends on USB && VIDEO_DEV -source "drivers/media/video/pvrusb2/Kconfig" - source "drivers/media/video/em28xx/Kconfig" source "drivers/media/video/usbvideo/Kconfig" diff --git a/trunk/drivers/media/video/cx25840/cx25840-vbi.c b/trunk/drivers/media/video/cx25840/cx25840-vbi.c index f85f2084324f..48014a254e15 100644 --- a/trunk/drivers/media/video/cx25840/cx25840-vbi.c +++ b/trunk/drivers/media/video/cx25840/cx25840-vbi.c @@ -235,7 +235,6 @@ int cx25840_vbi(struct i2c_client *client, unsigned int cmd, void *arg) 0, 0, V4L2_SLICED_VPS, 0, 0, /* 9 */ 0, 0, 0, 0 }; - int is_pal = !(cx25840_get_v4lstd(client) & V4L2_STD_525_60); int i; fmt = arg; @@ -247,25 +246,13 @@ int cx25840_vbi(struct i2c_client *client, unsigned int cmd, void *arg) if ((cx25840_read(client, 0x404) & 0x10) == 0) break; - if (is_pal) { - for (i = 7; i <= 23; i++) { - u8 v = cx25840_read(client, 0x424 + i - 7); - - svbi->service_lines[0][i] = lcr2vbi[v >> 4]; - svbi->service_lines[1][i] = lcr2vbi[v & 0xf]; - svbi->service_set |= - svbi->service_lines[0][i] | svbi->service_lines[1][i]; - } - } - else { - for (i = 10; i <= 21; i++) { - u8 v = cx25840_read(client, 0x424 + i - 10); + for (i = 7; i <= 23; i++) { + u8 v = cx25840_read(client, 0x424 + i - 7); - svbi->service_lines[0][i] = lcr2vbi[v >> 4]; - svbi->service_lines[1][i] = lcr2vbi[v & 0xf]; - svbi->service_set |= - svbi->service_lines[0][i] | svbi->service_lines[1][i]; - } + svbi->service_lines[0][i] = lcr2vbi[v >> 4]; + svbi->service_lines[1][i] = lcr2vbi[v & 0xf]; + svbi->service_set |= + svbi->service_lines[0][i] | svbi->service_lines[1][i]; } break; } diff --git a/trunk/drivers/media/video/cx88/cx88-cards.c b/trunk/drivers/media/video/cx88/cx88-cards.c index f764a57c56be..af71d4225c76 100644 --- a/trunk/drivers/media/video/cx88/cx88-cards.c +++ b/trunk/drivers/media/video/cx88/cx88-cards.c @@ -1230,7 +1230,6 @@ struct cx88_board cx88_boards[] = { .vmux = 2, .gpio0 = 0x84bf, }}, - .mpeg = CX88_MPEG_DVB, }, [CX88_BOARD_NORWOOD_MICRO] = { .name = "Norwood Micro TV Tuner", @@ -1591,18 +1590,6 @@ struct cx88_subid cx88_subids[] = { .subvendor = 0x0070, .subdevice = 0x9000, .card = CX88_BOARD_HAUPPAUGE_DVB_T1, - },{ - .subvendor = 0x0070, - .subdevice = 0x1400, - .card = CX88_BOARD_HAUPPAUGE_HVR3000, - },{ - .subvendor = 0x0070, - .subdevice = 0x1401, - .card = CX88_BOARD_HAUPPAUGE_HVR3000, - },{ - .subvendor = 0x0070, - .subdevice = 0x1402, - .card = CX88_BOARD_HAUPPAUGE_HVR3000, }, }; const unsigned int cx88_idcount = ARRAY_SIZE(cx88_subids); @@ -1646,15 +1633,7 @@ static void hauppauge_eeprom(struct cx88_core *core, u8 *eeprom_data) /* Make sure we support the board model */ switch (tv.model) { - case 14009: /* WinTV-HVR3000 (Retail, IR, b/panel video, 3.5mm audio in) */ - case 14019: /* WinTV-HVR3000 (Retail, IR Blaster, b/panel video, 3.5mm audio in) */ - case 14029: /* WinTV-HVR3000 (Retail, IR, b/panel video, 3.5mm audio in - 880 bridge) */ - case 14109: /* WinTV-HVR3000 (Retail, IR, b/panel video, 3.5mm audio in - low profile) */ - case 14129: /* WinTV-HVR3000 (Retail, IR, b/panel video, 3.5mm audio in - 880 bridge - LP) */ - case 14559: /* WinTV-HVR3000 (OEM, no IR, b/panel video, 3.5mm audio in) */ case 14569: /* WinTV-HVR3000 (OEM, no IR, no back panel video) */ - case 14659: /* WinTV-HVR3000 (OEM, no IR, b/panel video, RCA audio in - Low profile) */ - case 14669: /* WinTV-HVR3000 (OEM, no IR, no b/panel video - Low profile) */ case 28552: /* WinTV-PVR 'Roslyn' (No IR) */ case 34519: /* WinTV-PCI-FM */ case 90002: /* Nova-T-PCI (9002) */ diff --git a/trunk/drivers/media/video/cx88/cx88-dvb.c b/trunk/drivers/media/video/cx88/cx88-dvb.c index 0ef13e7efa2e..bd0c8797f26d 100644 --- a/trunk/drivers/media/video/cx88/cx88-dvb.c +++ b/trunk/drivers/media/video/cx88/cx88-dvb.c @@ -315,22 +315,15 @@ static struct cx22702_config hauppauge_novat_config = { .demod_address = 0x43, .output_mode = CX22702_SERIAL_OUTPUT, }; - static struct cx22702_config hauppauge_hvr1100_config = { .demod_address = 0x63, .output_mode = CX22702_SERIAL_OUTPUT, }; - static struct cx22702_config hauppauge_hvr1300_config = { .demod_address = 0x63, .output_mode = CX22702_SERIAL_OUTPUT, }; -static struct cx22702_config hauppauge_hvr3000_config = { - .demod_address = 0x63, - .output_mode = CX22702_SERIAL_OUTPUT, -}; - static int or51132_set_ts_param(struct dvb_frontend* fe, int is_punctured) { @@ -565,16 +558,6 @@ static int dvb_register(struct cx8802_dev *dev) &dvb_pll_fmd1216me); } break; - case CX88_BOARD_HAUPPAUGE_HVR3000: - dev->dvb.frontend = dvb_attach(cx22702_attach, - &hauppauge_hvr3000_config, - &dev->core->i2c_adap); - if (dev->dvb.frontend != NULL) { - dvb_attach(dvb_pll_attach, dev->dvb.frontend, 0x61, - &dev->core->i2c_adap, - &dvb_pll_fmd1216me); - } - break; case CX88_BOARD_DVICO_FUSIONHDTV_DVB_T_PLUS: dev->dvb.frontend = dvb_attach(mt352_attach, &dvico_fusionhdtv, diff --git a/trunk/drivers/media/video/cx88/cx88-input.c b/trunk/drivers/media/video/cx88/cx88-input.c index ee48995a4ab5..83ebf7a3c054 100644 --- a/trunk/drivers/media/video/cx88/cx88-input.c +++ b/trunk/drivers/media/video/cx88/cx88-input.c @@ -196,7 +196,6 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci) case CX88_BOARD_HAUPPAUGE_NOVASPLUS_S1: case CX88_BOARD_HAUPPAUGE_HVR1100: case CX88_BOARD_HAUPPAUGE_HVR1300: - case CX88_BOARD_HAUPPAUGE_HVR3000: ir_codes = ir_codes_hauppauge_new; ir_type = IR_TYPE_RC5; ir->sampling = 1; @@ -420,7 +419,6 @@ void cx88_ir_irq(struct cx88_core *core) case CX88_BOARD_HAUPPAUGE_NOVASPLUS_S1: case CX88_BOARD_HAUPPAUGE_HVR1100: case CX88_BOARD_HAUPPAUGE_HVR1300: - case CX88_BOARD_HAUPPAUGE_HVR3000: ircode = ir_decode_biphase(ir->samples, ir->scount, 5, 7); ir_dprintk("biphase decoded: %x\n", ircode); if ((ircode & 0xfffff000) != 0x3000) diff --git a/trunk/drivers/media/video/et61x251/et61x251_core.c b/trunk/drivers/media/video/et61x251/et61x251_core.c index f786ab11d2cd..bc544cc7ccb8 100644 --- a/trunk/drivers/media/video/et61x251/et61x251_core.c +++ b/trunk/drivers/media/video/et61x251/et61x251_core.c @@ -973,32 +973,16 @@ static CLASS_DEVICE_ATTR(i2c_val, S_IRUGO | S_IWUSR, et61x251_show_i2c_val, et61x251_store_i2c_val); -static int et61x251_create_sysfs(struct et61x251_device* cam) +static void et61x251_create_sysfs(struct et61x251_device* cam) { struct video_device *v4ldev = cam->v4ldev; - int rc; - rc = video_device_create_file(v4ldev, &class_device_attr_reg); - if (rc) goto err; - rc = video_device_create_file(v4ldev, &class_device_attr_val); - if (rc) goto err_reg; + video_device_create_file(v4ldev, &class_device_attr_reg); + video_device_create_file(v4ldev, &class_device_attr_val); if (cam->sensor.sysfs_ops) { - rc = video_device_create_file(v4ldev, &class_device_attr_i2c_reg); - if (rc) goto err_val; - rc = video_device_create_file(v4ldev, &class_device_attr_i2c_val); - if (rc) goto err_i2c_reg; + video_device_create_file(v4ldev, &class_device_attr_i2c_reg); + video_device_create_file(v4ldev, &class_device_attr_i2c_val); } - - return 0; - -err_i2c_reg: - video_device_remove_file(v4ldev, &class_device_attr_i2c_reg); -err_val: - video_device_remove_file(v4ldev, &class_device_attr_val); -err_reg: - video_device_remove_file(v4ldev, &class_device_attr_reg); -err: - return rc; } #endif /* CONFIG_VIDEO_ADV_DEBUG */ @@ -2550,9 +2534,7 @@ et61x251_usb_probe(struct usb_interface* intf, const struct usb_device_id* id) dev_nr = (dev_nr < ET61X251_MAX_DEVICES-1) ? dev_nr+1 : 0; #ifdef CONFIG_VIDEO_ADV_DEBUG - err = et61x251_create_sysfs(cam); - if (err) - goto fail2; + et61x251_create_sysfs(cam); DBG(2, "Optional device control through 'sysfs' interface ready"); #endif @@ -2562,13 +2544,6 @@ et61x251_usb_probe(struct usb_interface* intf, const struct usb_device_id* id) return 0; -#ifdef CONFIG_VIDEO_ADV_DEBUG -fail2: - video_nr[dev_nr] = -1; - dev_nr = (dev_nr < ET61X251_MAX_DEVICES-1) ? dev_nr+1 : 0; - mutex_unlock(&cam->dev_mutex); - video_unregister_device(cam->v4ldev); -#endif fail: if (cam) { kfree(cam->control_buffer); diff --git a/trunk/drivers/media/video/ov511.c b/trunk/drivers/media/video/ov511.c index b4db2cbb5a84..ce4886f1528d 100644 --- a/trunk/drivers/media/video/ov511.c +++ b/trunk/drivers/media/video/ov511.c @@ -5648,49 +5648,17 @@ static ssize_t show_exposure(struct class_device *cd, char *buf) } static CLASS_DEVICE_ATTR(exposure, S_IRUGO, show_exposure, NULL); -static int ov_create_sysfs(struct video_device *vdev) +static void ov_create_sysfs(struct video_device *vdev) { - int rc; - - rc = video_device_create_file(vdev, &class_device_attr_custom_id); - if (rc) goto err; - rc = video_device_create_file(vdev, &class_device_attr_model); - if (rc) goto err_id; - rc = video_device_create_file(vdev, &class_device_attr_bridge); - if (rc) goto err_model; - rc = video_device_create_file(vdev, &class_device_attr_sensor); - if (rc) goto err_bridge; - rc = video_device_create_file(vdev, &class_device_attr_brightness); - if (rc) goto err_sensor; - rc = video_device_create_file(vdev, &class_device_attr_saturation); - if (rc) goto err_bright; - rc = video_device_create_file(vdev, &class_device_attr_contrast); - if (rc) goto err_sat; - rc = video_device_create_file(vdev, &class_device_attr_hue); - if (rc) goto err_contrast; - rc = video_device_create_file(vdev, &class_device_attr_exposure); - if (rc) goto err_hue; - - return 0; - -err_hue: - video_device_remove_file(vdev, &class_device_attr_hue); -err_contrast: - video_device_remove_file(vdev, &class_device_attr_contrast); -err_sat: - video_device_remove_file(vdev, &class_device_attr_saturation); -err_bright: - video_device_remove_file(vdev, &class_device_attr_brightness); -err_sensor: - video_device_remove_file(vdev, &class_device_attr_sensor); -err_bridge: - video_device_remove_file(vdev, &class_device_attr_bridge); -err_model: - video_device_remove_file(vdev, &class_device_attr_model); -err_id: - video_device_remove_file(vdev, &class_device_attr_custom_id); -err: - return rc; + video_device_create_file(vdev, &class_device_attr_custom_id); + video_device_create_file(vdev, &class_device_attr_model); + video_device_create_file(vdev, &class_device_attr_bridge); + video_device_create_file(vdev, &class_device_attr_sensor); + video_device_create_file(vdev, &class_device_attr_brightness); + video_device_create_file(vdev, &class_device_attr_saturation); + video_device_create_file(vdev, &class_device_attr_contrast); + video_device_create_file(vdev, &class_device_attr_hue); + video_device_create_file(vdev, &class_device_attr_exposure); } /**************************************************************************** @@ -5849,11 +5817,7 @@ ov51x_probe(struct usb_interface *intf, const struct usb_device_id *id) ov->vdev->minor); usb_set_intfdata(intf, ov); - if (ov_create_sysfs(ov->vdev)) { - err("ov_create_sysfs failed"); - goto error; - } - + ov_create_sysfs(ov->vdev); return 0; error: diff --git a/trunk/drivers/media/video/pwc/pwc-if.c b/trunk/drivers/media/video/pwc/pwc-if.c index 46c114830884..c77b85cf3d80 100644 --- a/trunk/drivers/media/video/pwc/pwc-if.c +++ b/trunk/drivers/media/video/pwc/pwc-if.c @@ -1024,25 +1024,12 @@ static ssize_t show_snapshot_button_status(struct class_device *class_dev, char static CLASS_DEVICE_ATTR(button, S_IRUGO | S_IWUSR, show_snapshot_button_status, NULL); -static int pwc_create_sysfs_files(struct video_device *vdev) +static void pwc_create_sysfs_files(struct video_device *vdev) { struct pwc_device *pdev = video_get_drvdata(vdev); - int rc; - - rc = video_device_create_file(vdev, &class_device_attr_button); - if (rc) - goto err; - if (pdev->features & FEATURE_MOTOR_PANTILT) { - rc = video_device_create_file(vdev,&class_device_attr_pan_tilt); - if (rc) goto err_button; - } - - return 0; - -err_button: - video_device_remove_file(vdev, &class_device_attr_button); -err: - return rc; + if (pdev->features & FEATURE_MOTOR_PANTILT) + video_device_create_file(vdev, &class_device_attr_pan_tilt); + video_device_create_file(vdev, &class_device_attr_button); } static void pwc_remove_sysfs_files(struct video_device *vdev) @@ -1421,7 +1408,7 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id struct usb_device *udev = interface_to_usbdev(intf); struct pwc_device *pdev = NULL; int vendor_id, product_id, type_id; - int i, hint, rc; + int i, hint; int features = 0; int video_nr = -1; /* default: use next available device */ char serial_number[30], *name; @@ -1722,8 +1709,9 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id i = video_register_device(pdev->vdev, VFL_TYPE_GRABBER, video_nr); if (i < 0) { PWC_ERROR("Failed to register as video device (%d).\n", i); - rc = i; - goto err; + video_device_release(pdev->vdev); /* Drip... drip... drip... */ + kfree(pdev); /* Oops, no memory leaks please */ + return -EIO; } else { PWC_INFO("Registered as /dev/video%d.\n", pdev->vdev->minor & 0x3F); @@ -1735,24 +1723,13 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id PWC_DEBUG_PROBE("probe() function returning struct at 0x%p.\n", pdev); usb_set_intfdata (intf, pdev); - rc = pwc_create_sysfs_files(pdev->vdev); - if (rc) - goto err_unreg; + pwc_create_sysfs_files(pdev->vdev); /* Set the leds off */ pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); return 0; - -err_unreg: - if (hint < MAX_DEV_HINTS) - device_hint[hint].pdev = NULL; - video_unregister_device(pdev->vdev); -err: - video_device_release(pdev->vdev); /* Drip... drip... drip... */ - kfree(pdev); /* Oops, no memory leaks please */ - return rc; } /* The user janked out the cable... */ diff --git a/trunk/drivers/media/video/saa7115.c b/trunk/drivers/media/video/saa7115.c index c5719f7bd1ac..974179d4d389 100644 --- a/trunk/drivers/media/video/saa7115.c +++ b/trunk/drivers/media/video/saa7115.c @@ -960,8 +960,6 @@ static void saa711x_set_v4lstd(struct i2c_client *client, v4l2_std_id std) reg |= 0x10; } else if (std == V4L2_STD_NTSC_M_JP) { reg |= 0x40; - } else if (std == V4L2_STD_SECAM) { - reg |= 0x50; } saa711x_write(client, R_0E_CHROMA_CNTL_1, reg); } else { diff --git a/trunk/drivers/media/video/saa7134/saa7134-video.c b/trunk/drivers/media/video/saa7134/saa7134-video.c index 830617ea81cc..203302f21827 100644 --- a/trunk/drivers/media/video/saa7134/saa7134-video.c +++ b/trunk/drivers/media/video/saa7134/saa7134-video.c @@ -2248,11 +2248,7 @@ static int radio_do_ioctl(struct inode *inode, struct file *file, t->type = V4L2_TUNER_RADIO; saa7134_i2c_call_clients(dev, VIDIOC_G_TUNER, t); - if (dev->input->amux == TV) { - t->signal = 0xf800 - ((saa_readb(0x581) & 0x1f) << 11); - t->rxsubchans = (saa_readb(0x529) & 0x08) ? - V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO; - } + return 0; } case VIDIOC_S_TUNER: diff --git a/trunk/drivers/media/video/sn9c102/sn9c102_core.c b/trunk/drivers/media/video/sn9c102/sn9c102_core.c index a4702d3c2aca..3e0ff8a78468 100644 --- a/trunk/drivers/media/video/sn9c102/sn9c102_core.c +++ b/trunk/drivers/media/video/sn9c102/sn9c102_core.c @@ -1240,53 +1240,23 @@ static CLASS_DEVICE_ATTR(frame_header, S_IRUGO, sn9c102_show_frame_header, NULL); -static int sn9c102_create_sysfs(struct sn9c102_device* cam) +static void sn9c102_create_sysfs(struct sn9c102_device* cam) { struct video_device *v4ldev = cam->v4ldev; - int rc; - rc = video_device_create_file(v4ldev, &class_device_attr_reg); - if (rc) goto err; - rc = video_device_create_file(v4ldev, &class_device_attr_val); - if (rc) goto err_reg; - rc = video_device_create_file(v4ldev, &class_device_attr_frame_header); - if (rc) goto err_val; - - if (cam->sensor.sysfs_ops) { - rc = video_device_create_file(v4ldev, &class_device_attr_i2c_reg); - if (rc) goto err_frhead; - rc = video_device_create_file(v4ldev, &class_device_attr_i2c_val); - if (rc) goto err_i2c_reg; + video_device_create_file(v4ldev, &class_device_attr_reg); + video_device_create_file(v4ldev, &class_device_attr_val); + video_device_create_file(v4ldev, &class_device_attr_frame_header); + if (cam->bridge == BRIDGE_SN9C101 || cam->bridge == BRIDGE_SN9C102) + video_device_create_file(v4ldev, &class_device_attr_green); + else if (cam->bridge == BRIDGE_SN9C103) { + video_device_create_file(v4ldev, &class_device_attr_blue); + video_device_create_file(v4ldev, &class_device_attr_red); } - - if (cam->bridge == BRIDGE_SN9C101 || cam->bridge == BRIDGE_SN9C102) { - rc = video_device_create_file(v4ldev, &class_device_attr_green); - if (rc) goto err_i2c_val; - } else if (cam->bridge == BRIDGE_SN9C103) { - rc = video_device_create_file(v4ldev, &class_device_attr_blue); - if (rc) goto err_i2c_val; - rc = video_device_create_file(v4ldev, &class_device_attr_red); - if (rc) goto err_blue; + if (cam->sensor.sysfs_ops) { + video_device_create_file(v4ldev, &class_device_attr_i2c_reg); + video_device_create_file(v4ldev, &class_device_attr_i2c_val); } - - return 0; - -err_blue: - video_device_remove_file(v4ldev, &class_device_attr_blue); -err_i2c_val: - if (cam->sensor.sysfs_ops) - video_device_remove_file(v4ldev, &class_device_attr_i2c_val); -err_i2c_reg: - if (cam->sensor.sysfs_ops) - video_device_remove_file(v4ldev, &class_device_attr_i2c_reg); -err_frhead: - video_device_remove_file(v4ldev, &class_device_attr_frame_header); -err_val: - video_device_remove_file(v4ldev, &class_device_attr_val); -err_reg: - video_device_remove_file(v4ldev, &class_device_attr_reg); -err: - return rc; } #endif /* CONFIG_VIDEO_ADV_DEBUG */ @@ -2839,7 +2809,10 @@ sn9c102_usb_probe(struct usb_interface* intf, const struct usb_device_id* id) DBG(1, "V4L2 device registration failed"); if (err == -ENFILE && video_nr[dev_nr] == -1) DBG(1, "Free /dev/videoX node not found"); - goto fail2; + video_nr[dev_nr] = -1; + dev_nr = (dev_nr < SN9C102_MAX_DEVICES-1) ? dev_nr+1 : 0; + mutex_unlock(&cam->dev_mutex); + goto fail; } DBG(2, "V4L2 device registered as /dev/video%d", cam->v4ldev->minor); @@ -2850,9 +2823,7 @@ sn9c102_usb_probe(struct usb_interface* intf, const struct usb_device_id* id) dev_nr = (dev_nr < SN9C102_MAX_DEVICES-1) ? dev_nr+1 : 0; #ifdef CONFIG_VIDEO_ADV_DEBUG - err = sn9c102_create_sysfs(cam); - if (err) - goto fail3; + sn9c102_create_sysfs(cam); DBG(2, "Optional device control through 'sysfs' interface ready"); #endif @@ -2862,14 +2833,6 @@ sn9c102_usb_probe(struct usb_interface* intf, const struct usb_device_id* id) return 0; -#ifdef CONFIG_VIDEO_ADV_DEBUG -fail3: - video_unregister_device(cam->v4ldev); -#endif -fail2: - video_nr[dev_nr] = -1; - dev_nr = (dev_nr < SN9C102_MAX_DEVICES-1) ? dev_nr+1 : 0; - mutex_unlock(&cam->dev_mutex); fail: if (cam) { kfree(cam->control_buffer); diff --git a/trunk/drivers/media/video/stv680.c b/trunk/drivers/media/video/stv680.c index 6d1ef1e2e8ef..87e11300181d 100644 --- a/trunk/drivers/media/video/stv680.c +++ b/trunk/drivers/media/video/stv680.c @@ -516,45 +516,16 @@ stv680_file(frames_read, framecount, "%d\n"); stv680_file(packets_dropped, dropped, "%d\n"); stv680_file(decoding_errors, error, "%d\n"); -static int stv680_create_sysfs_files(struct video_device *vdev) +static void stv680_create_sysfs_files(struct video_device *vdev) { - int rc; - - rc = video_device_create_file(vdev, &class_device_attr_model); - if (rc) goto err; - rc = video_device_create_file(vdev, &class_device_attr_in_use); - if (rc) goto err_model; - rc = video_device_create_file(vdev, &class_device_attr_streaming); - if (rc) goto err_inuse; - rc = video_device_create_file(vdev, &class_device_attr_palette); - if (rc) goto err_stream; - rc = video_device_create_file(vdev, &class_device_attr_frames_total); - if (rc) goto err_pal; - rc = video_device_create_file(vdev, &class_device_attr_frames_read); - if (rc) goto err_framtot; - rc = video_device_create_file(vdev, &class_device_attr_packets_dropped); - if (rc) goto err_framread; - rc = video_device_create_file(vdev, &class_device_attr_decoding_errors); - if (rc) goto err_dropped; - - return 0; - -err_dropped: - video_device_remove_file(vdev, &class_device_attr_packets_dropped); -err_framread: - video_device_remove_file(vdev, &class_device_attr_frames_read); -err_framtot: - video_device_remove_file(vdev, &class_device_attr_frames_total); -err_pal: - video_device_remove_file(vdev, &class_device_attr_palette); -err_stream: - video_device_remove_file(vdev, &class_device_attr_streaming); -err_inuse: - video_device_remove_file(vdev, &class_device_attr_in_use); -err_model: - video_device_remove_file(vdev, &class_device_attr_model); -err: - return rc; + video_device_create_file(vdev, &class_device_attr_model); + video_device_create_file(vdev, &class_device_attr_in_use); + video_device_create_file(vdev, &class_device_attr_streaming); + video_device_create_file(vdev, &class_device_attr_palette); + video_device_create_file(vdev, &class_device_attr_frames_total); + video_device_create_file(vdev, &class_device_attr_frames_read); + video_device_create_file(vdev, &class_device_attr_packets_dropped); + video_device_create_file(vdev, &class_device_attr_decoding_errors); } static void stv680_remove_sysfs_files(struct video_device *vdev) @@ -1447,13 +1418,9 @@ static int stv680_probe (struct usb_interface *intf, const struct usb_device_id PDEBUG (0, "STV(i): registered new video device: video%d", stv680->vdev->minor); usb_set_intfdata (intf, stv680); - retval = stv680_create_sysfs_files(stv680->vdev); - if (retval) - goto error_unreg; + stv680_create_sysfs_files(stv680->vdev); return 0; -error_unreg: - video_unregister_device(stv680->vdev); error_vdev: video_device_release(stv680->vdev); error: diff --git a/trunk/drivers/media/video/tuner-types.c b/trunk/drivers/media/video/tuner-types.c index 781682373b61..8fff642fad56 100644 --- a/trunk/drivers/media/video/tuner-types.c +++ b/trunk/drivers/media/video/tuner-types.c @@ -1046,6 +1046,7 @@ static struct tuner_params tuner_samsung_tcpn_2121p30a_params[] = { .type = TUNER_PARAM_TYPE_NTSC, .ranges = tuner_samsung_tcpn_2121p30a_ntsc_ranges, .count = ARRAY_SIZE(tuner_samsung_tcpn_2121p30a_ntsc_ranges), + .has_tda9887 = 1, }, }; diff --git a/trunk/drivers/media/video/videodev.c b/trunk/drivers/media/video/videodev.c index d424a4129d69..479a0675cf60 100644 --- a/trunk/drivers/media/video/videodev.c +++ b/trunk/drivers/media/video/videodev.c @@ -17,11 +17,10 @@ */ #define dbgarg(cmd, fmt, arg...) \ - if (vfd->debug & V4L2_DEBUG_IOCTL_ARG) { \ + if (vfd->debug & V4L2_DEBUG_IOCTL_ARG) \ printk (KERN_DEBUG "%s: ", vfd->name); \ v4l_printk_ioctl(cmd); \ - printk (KERN_DEBUG "%s: " fmt, vfd->name, ## arg); \ - } + printk (KERN_DEBUG "%s: " fmt, vfd->name, ## arg); #define dbgarg2(fmt, arg...) \ if (vfd->debug & V4L2_DEBUG_IOCTL_ARG) \ @@ -1288,7 +1287,6 @@ static int __video_do_ioctl(struct inode *inode, struct file *file, ret=vfd->vidioc_g_parm(file, fh, p); } else { struct v4l2_standard s; - int i; if (!vfd->tvnormsize) { printk (KERN_WARNING "%s: no TV norms defined!\n", @@ -1299,14 +1297,8 @@ static int __video_do_ioctl(struct inode *inode, struct file *file, if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; - for (i = 0; i < vfd->tvnormsize; i++) - if (vfd->tvnorms[i].id == vfd->current_norm) - break; - if (i >= vfd->tvnormsize) - return -EINVAL; - - v4l2_video_std_construct(&s, vfd->current_norm, - vfd->tvnorms[i].name); + v4l2_video_std_construct(&s, vfd->tvnorms[vfd->current_norm].id, + vfd->tvnorms[vfd->current_norm].name); memset(p,0,sizeof(*p)); diff --git a/trunk/drivers/media/video/vivi.c b/trunk/drivers/media/video/vivi.c index 3c8dc72dc8e9..e7c01d560b64 100644 --- a/trunk/drivers/media/video/vivi.c +++ b/trunk/drivers/media/video/vivi.c @@ -272,7 +272,7 @@ static void gen_line(struct sg_to_addr to_addr[],int inipos,int pages,int wmax, /* Get first addr pointed to pixel position */ oldpg=get_addr_pos(pos,pages,to_addr); - pg=pfn_to_page(sg_dma_address(to_addr[oldpg].sg) >> PAGE_SHIFT); + pg=pfn_to_page(to_addr[oldpg].sg->dma_address >> PAGE_SHIFT); basep = kmap_atomic(pg, KM_BOUNCE_READ)+to_addr[oldpg].sg->offset; /* We will just duplicate the second pixel at the packet */ @@ -287,7 +287,7 @@ static void gen_line(struct sg_to_addr to_addr[],int inipos,int pages,int wmax, for (color=0;color<4;color++) { pgpos=get_addr_pos(pos,pages,to_addr); if (pgpos!=oldpg) { - pg=pfn_to_page(sg_dma_address(to_addr[pgpos].sg) >> PAGE_SHIFT); + pg=pfn_to_page(to_addr[pgpos].sg->dma_address >> PAGE_SHIFT); kunmap_atomic(basep, KM_BOUNCE_READ); basep= kmap_atomic(pg, KM_BOUNCE_READ)+to_addr[pgpos].sg->offset; oldpg=pgpos; @@ -339,8 +339,8 @@ static void gen_line(struct sg_to_addr to_addr[],int inipos,int pages,int wmax, for (color=0;color<4;color++) { pgpos=get_addr_pos(pos,pages,to_addr); if (pgpos!=oldpg) { - pg=pfn_to_page(sg_dma_address( - to_addr[pgpos].sg) + pg=pfn_to_page(to_addr[pgpos]. + sg->dma_address >> PAGE_SHIFT); kunmap_atomic(basep, KM_BOUNCE_READ); @@ -386,7 +386,7 @@ static void vivi_fillbuff(struct vivi_dev *dev,struct vivi_buffer *buf) struct timeval ts; /* Test if DMA mapping is ready */ - if (!sg_dma_address(&vb->dma.sglist[0])) + if (!vb->dma.sglist[0].dma_address) return; prep_to_addr(to_addr,vb); @@ -783,7 +783,7 @@ static int vivi_map_sg(void *dev, struct scatterlist *sg, int nents, for (i = 0; i < nents; i++ ) { BUG_ON(!sg[i].page); - sg_dma_address(&sg[i]) = page_to_phys(sg[i].page) + sg[i].offset; + sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset; } return nents; diff --git a/trunk/drivers/misc/Kconfig b/trunk/drivers/misc/Kconfig index fa7acc2c5c6d..3df0e7a07c46 100644 --- a/trunk/drivers/misc/Kconfig +++ b/trunk/drivers/misc/Kconfig @@ -57,23 +57,4 @@ config TIFM_7XX1 To compile this driver as a module, choose M here: the module will be called tifm_7xx1. -config MSI_LAPTOP - tristate "MSI Laptop Extras" - depends on X86 - depends on ACPI_EC - depends on BACKLIGHT_CLASS_DEVICE - ---help--- - This is a driver for laptops built by MSI (MICRO-STAR - INTERNATIONAL): - - MSI MegaBook S270 (MS-1013) - Cytron/TCM/Medion/Tchibo MD96100/SAM2000 - - It adds support for Bluetooth, WLAN and LCD brightness control. - - More information about this driver is available at - . - - If you have an MSI S270 laptop, say Y or M here. - endmenu diff --git a/trunk/drivers/misc/Makefile b/trunk/drivers/misc/Makefile index 9a91c1ee8497..d65ece76095a 100644 --- a/trunk/drivers/misc/Makefile +++ b/trunk/drivers/misc/Makefile @@ -5,7 +5,6 @@ obj- := misc.o # Dummy rule to force built-in.o to be made obj-$(CONFIG_IBM_ASM) += ibmasm/ obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/ -obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o obj-$(CONFIG_LKDTM) += lkdtm.o obj-$(CONFIG_TIFM_CORE) += tifm_core.o obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o diff --git a/trunk/drivers/misc/msi-laptop.c b/trunk/drivers/misc/msi-laptop.c deleted file mode 100644 index fdb7153f4426..000000000000 --- a/trunk/drivers/misc/msi-laptop.c +++ /dev/null @@ -1,395 +0,0 @@ -/*-*-linux-c-*-*/ - -/* - Copyright (C) 2006 Lennart Poettering - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - */ - -/* - * msi-laptop.c - MSI S270 laptop support. This laptop is sold under - * various brands, including "Cytron/TCM/Medion/Tchibo MD96100". - * - * This driver exports a few files in /sys/devices/platform/msi-laptop-pf/: - * - * lcd_level - Screen brightness: contains a single integer in the - * range 0..8. (rw) - * - * auto_brightness - Enable automatic brightness control: contains - * either 0 or 1. If set to 1 the hardware adjusts the screen - * brightness automatically when the power cord is - * plugged/unplugged. (rw) - * - * wlan - WLAN subsystem enabled: contains either 0 or 1. (ro) - * - * bluetooth - Bluetooth subsystem enabled: contains either 0 or 1 - * Please note that this file is constantly 0 if no Bluetooth - * hardware is available. (ro) - * - * In addition to these platform device attributes the driver - * registers itself in the Linux backlight control subsystem and is - * available to userspace under /sys/class/backlight/msi-laptop-bl/. - * - * This driver might work on other laptops produced by MSI. If you - * want to try it you can pass force=1 as argument to the module which - * will force it to load even when the DMI data doesn't identify the - * laptop as MSI S270. YMMV. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define MSI_DRIVER_VERSION "0.5" - -#define MSI_LCD_LEVEL_MAX 9 - -#define MSI_EC_COMMAND_WIRELESS 0x10 -#define MSI_EC_COMMAND_LCD_LEVEL 0x11 - -static int force; -module_param(force, bool, 0); -MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); - -static int auto_brightness; -module_param(auto_brightness, int, 0); -MODULE_PARM_DESC(auto_brightness, "Enable automatic brightness control (0: disabled; 1: enabled; 2: don't touch)"); - -/* Hardware access */ - -static int set_lcd_level(int level) -{ - u8 buf[2]; - - if (level < 0 || level >= MSI_LCD_LEVEL_MAX) - return -EINVAL; - - buf[0] = 0x80; - buf[1] = (u8) (level*31); - - return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf), NULL, 0); -} - -static int get_lcd_level(void) -{ - u8 wdata = 0, rdata; - int result; - - result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, &rdata, 1); - if (result < 0) - return result; - - return (int) rdata / 31; -} - -static int get_auto_brightness(void) -{ - u8 wdata = 4, rdata; - int result; - - result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, &rdata, 1); - if (result < 0) - return result; - - return !!(rdata & 8); -} - -static int set_auto_brightness(int enable) -{ - u8 wdata[2], rdata; - int result; - - wdata[0] = 4; - - result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1, &rdata, 1); - if (result < 0) - return result; - - wdata[0] = 0x84; - wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0); - - return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2, NULL, 0); -} - -static int get_wireless_state(int *wlan, int *bluetooth) -{ - u8 wdata = 0, rdata; - int result; - - result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); - if (result < 0) - return -1; - - if (wlan) - *wlan = !!(rdata & 8); - - if (bluetooth) - *bluetooth = !!(rdata & 128); - - return 0; -} - -/* Backlight device stuff */ - -static int bl_get_brightness(struct backlight_device *b) -{ - return get_lcd_level(); -} - - -static int bl_update_status(struct backlight_device *b) -{ - return set_lcd_level(b->props->brightness); -} - -static struct backlight_properties msibl_props = { - .owner = THIS_MODULE, - .get_brightness = bl_get_brightness, - .update_status = bl_update_status, - .max_brightness = MSI_LCD_LEVEL_MAX-1, -}; - -static struct backlight_device *msibl_device; - -/* Platform device */ - -static ssize_t show_wlan(struct device *dev, - struct device_attribute *attr, char *buf) -{ - - int ret, enabled; - - ret = get_wireless_state(&enabled, NULL); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", enabled); -} - -static ssize_t show_bluetooth(struct device *dev, - struct device_attribute *attr, char *buf) -{ - - int ret, enabled; - - ret = get_wireless_state(NULL, &enabled); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", enabled); -} - -static ssize_t show_lcd_level(struct device *dev, - struct device_attribute *attr, char *buf) -{ - - int ret; - - ret = get_lcd_level(); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", ret); -} - -static ssize_t store_lcd_level(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - - int level, ret; - - if (sscanf(buf, "%i", &level) != 1 || (level < 0 || level >= MSI_LCD_LEVEL_MAX)) - return -EINVAL; - - ret = set_lcd_level(level); - if (ret < 0) - return ret; - - return count; -} - -static ssize_t show_auto_brightness(struct device *dev, - struct device_attribute *attr, char *buf) -{ - - int ret; - - ret = get_auto_brightness(); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", ret); -} - -static ssize_t store_auto_brightness(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - - int enable, ret; - - if (sscanf(buf, "%i", &enable) != 1 || (enable != (enable & 1))) - return -EINVAL; - - ret = set_auto_brightness(enable); - if (ret < 0) - return ret; - - return count; -} - -static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); -static DEVICE_ATTR(auto_brightness, 0644, show_auto_brightness, store_auto_brightness); -static DEVICE_ATTR(bluetooth, 0444, show_bluetooth, NULL); -static DEVICE_ATTR(wlan, 0444, show_wlan, NULL); - -static struct attribute *msipf_attributes[] = { - &dev_attr_lcd_level.attr, - &dev_attr_auto_brightness.attr, - &dev_attr_bluetooth.attr, - &dev_attr_wlan.attr, - NULL -}; - -static struct attribute_group msipf_attribute_group = { - .attrs = msipf_attributes -}; - -static struct platform_driver msipf_driver = { - .driver = { - .name = "msi-laptop-pf", - .owner = THIS_MODULE, - } -}; - -static struct platform_device *msipf_device; - -/* Initialization */ - -static struct dmi_system_id __initdata msi_dmi_table[] = { - { - .ident = "MSI S270", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT'L CO.,LTD"), - DMI_MATCH(DMI_PRODUCT_NAME, "MS-1013"), - } - }, - { - .ident = "Medion MD96100", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "NOTEBOOK"), - DMI_MATCH(DMI_PRODUCT_NAME, "SAM2000"), - } - }, - { } -}; - - -static int __init msi_init(void) -{ - int ret; - - if (acpi_disabled) - return -ENODEV; - - if (!force && !dmi_check_system(msi_dmi_table)) - return -ENODEV; - - if (auto_brightness < 0 || auto_brightness > 2) - return -EINVAL; - - /* Register backlight stuff */ - - msibl_device = backlight_device_register("msi-laptop-bl", NULL, &msibl_props); - if (IS_ERR(msibl_device)) - return PTR_ERR(msibl_device); - - ret = platform_driver_register(&msipf_driver); - if (ret) - goto fail_backlight; - - /* Register platform stuff */ - - msipf_device = platform_device_alloc("msi-laptop-pf", -1); - if (!msipf_device) { - ret = -ENOMEM; - goto fail_platform_driver; - } - - ret = platform_device_add(msipf_device); - if (ret) - goto fail_platform_device1; - - ret = sysfs_create_group(&msipf_device->dev.kobj, &msipf_attribute_group); - if (ret) - goto fail_platform_device2; - - /* Disable automatic brightness control by default because - * this module was probably loaded to do brightness control in - * software. */ - - if (auto_brightness != 2) - set_auto_brightness(auto_brightness); - - printk(KERN_INFO "msi-laptop: driver "MSI_DRIVER_VERSION" successfully loaded.\n"); - - return 0; - -fail_platform_device2: - - platform_device_del(msipf_device); - -fail_platform_device1: - - platform_device_put(msipf_device); - -fail_platform_driver: - - platform_driver_unregister(&msipf_driver); - -fail_backlight: - - backlight_device_unregister(msibl_device); - - return ret; -} - -static void __exit msi_cleanup(void) -{ - - sysfs_remove_group(&msipf_device->dev.kobj, &msipf_attribute_group); - platform_device_unregister(msipf_device); - platform_driver_unregister(&msipf_driver); - backlight_device_unregister(msibl_device); - - /* Enable automatic brightness control again */ - if (auto_brightness != 2) - set_auto_brightness(1); - - printk(KERN_INFO "msi-laptop: driver unloaded.\n"); -} - -module_init(msi_init); -module_exit(msi_cleanup); - -MODULE_AUTHOR("Lennart Poettering"); -MODULE_DESCRIPTION("MSI Laptop Support"); -MODULE_VERSION(MSI_DRIVER_VERSION); -MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/net/b44.c b/trunk/drivers/net/b44.c index 1ec217433b4c..b124eee4eb10 100644 --- a/trunk/drivers/net/b44.c +++ b/trunk/drivers/net/b44.c @@ -1706,15 +1706,14 @@ static void __b44_set_rx_mode(struct net_device *dev) __b44_set_mac_addr(bp); - if ((dev->flags & IFF_ALLMULTI) || - (dev->mc_count > B44_MCAST_TABLE_SIZE)) + if (dev->flags & IFF_ALLMULTI) val |= RXCONFIG_ALLMULTI; else i = __b44_load_mcast(bp, dev); - for (; i < 64; i++) + for (; i < 64; i++) { __b44_cam_write(bp, zero, i); - + } bw32(bp, B44_RXCONFIG, val); val = br32(bp, B44_CAM_CTRL); bw32(bp, B44_CAM_CTRL, val | CAM_CTRL_ENABLE); @@ -2056,7 +2055,7 @@ static int b44_read_eeprom(struct b44 *bp, u8 *data) u16 *ptr = (u16 *) data; for (i = 0; i < 128; i += 2) - ptr[i / 2] = cpu_to_le16(readw(bp->regs + 4096 + i)); + ptr[i / 2] = readw(bp->regs + 4096 + i); return 0; } diff --git a/trunk/drivers/net/bonding/bond_alb.c b/trunk/drivers/net/bonding/bond_alb.c index 32923162179e..e83bc825f6af 100644 --- a/trunk/drivers/net/bonding/bond_alb.c +++ b/trunk/drivers/net/bonding/bond_alb.c @@ -1433,7 +1433,7 @@ void bond_alb_monitor(struct bonding *bond) * write lock to protect from other code that also * sets the promiscuity. */ - write_lock_bh(&bond->curr_slave_lock); + write_lock(&bond->curr_slave_lock); if (bond_info->primary_is_promisc && (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { @@ -1448,7 +1448,7 @@ void bond_alb_monitor(struct bonding *bond) bond_info->primary_is_promisc = 0; } - write_unlock_bh(&bond->curr_slave_lock); + write_unlock(&bond->curr_slave_lock); if (bond_info->rlb_rebalance) { bond_info->rlb_rebalance = 0; diff --git a/trunk/drivers/net/ehea/ehea.h b/trunk/drivers/net/ehea/ehea.h index b40724fc6b74..23b451a8ae12 100644 --- a/trunk/drivers/net/ehea/ehea.h +++ b/trunk/drivers/net/ehea/ehea.h @@ -39,7 +39,7 @@ #include #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0034" +#define DRV_VERSION "EHEA_0028" #define EHEA_MSG_DEFAULT (NETIF_MSG_LINK | NETIF_MSG_TIMER \ | NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -50,7 +50,6 @@ #define EHEA_MAX_ENTRIES_SQ 32767 #define EHEA_MIN_ENTRIES_QP 127 -#define EHEA_SMALL_QUEUES #define EHEA_NUM_TX_QP 1 #ifdef EHEA_SMALL_QUEUES @@ -60,11 +59,11 @@ #define EHEA_DEF_ENTRIES_RQ2 1023 #define EHEA_DEF_ENTRIES_RQ3 1023 #else -#define EHEA_MAX_CQE_COUNT 4080 -#define EHEA_DEF_ENTRIES_SQ 4080 -#define EHEA_DEF_ENTRIES_RQ1 8160 -#define EHEA_DEF_ENTRIES_RQ2 2040 -#define EHEA_DEF_ENTRIES_RQ3 2040 +#define EHEA_MAX_CQE_COUNT 32000 +#define EHEA_DEF_ENTRIES_SQ 16000 +#define EHEA_DEF_ENTRIES_RQ1 32080 +#define EHEA_DEF_ENTRIES_RQ2 4020 +#define EHEA_DEF_ENTRIES_RQ3 4020 #endif #define EHEA_MAX_ENTRIES_EQ 20 diff --git a/trunk/drivers/net/ehea/ehea_main.c b/trunk/drivers/net/ehea/ehea_main.c index eb7d44de59ff..c6b31775e26b 100644 --- a/trunk/drivers/net/ehea/ehea_main.c +++ b/trunk/drivers/net/ehea/ehea_main.c @@ -766,7 +766,7 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) if (EHEA_BMASK_GET(NEQE_PORT_UP, eqe)) { if (!netif_carrier_ok(port->netdev)) { ret = ehea_sense_port_attr( - port); + adapter->port[portnum]); if (ret) { ehea_error("failed resensing port " "attributes"); @@ -818,7 +818,7 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) netif_stop_queue(port->netdev); break; default: - ehea_error("unknown event code %x, eqe=0x%lX", ec, eqe); + ehea_error("unknown event code %x", ec); break; } } @@ -1841,7 +1841,7 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) if (netif_msg_tx_queued(port)) { ehea_info("post swqe on QP %d", pr->qp->init_attr.qp_nr); - ehea_dump(swqe, 512, "swqe"); + ehea_dump(swqe, sizeof(*swqe), "swqe"); } ehea_post_swqe(pr->qp, swqe); diff --git a/trunk/drivers/net/ehea/ehea_phyp.c b/trunk/drivers/net/ehea/ehea_phyp.c index 0b51a8cea077..4a85aca4c7e9 100644 --- a/trunk/drivers/net/ehea/ehea_phyp.c +++ b/trunk/drivers/net/ehea/ehea_phyp.c @@ -44,99 +44,71 @@ static inline u16 get_order_of_qentries(u16 queue_entries) #define H_ALL_RES_TYPE_MR 5 #define H_ALL_RES_TYPE_MW 6 -static long ehea_plpar_hcall_norets(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7) +static long ehea_hcall_9arg_9ret(unsigned long opcode, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5, unsigned long arg6, + unsigned long arg7, unsigned long arg8, + unsigned long arg9, unsigned long *out1, + unsigned long *out2,unsigned long *out3, + unsigned long *out4,unsigned long *out5, + unsigned long *out6,unsigned long *out7, + unsigned long *out8,unsigned long *out9) { - long ret; + long hret; int i, sleep_msecs; for (i = 0; i < 5; i++) { - ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, - arg5, arg6, arg7); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); + hret = plpar_hcall_9arg_9ret(opcode,arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8, arg9, out1, + out2, out3, out4, out5, out6, out7, + out8, out9); + if (H_IS_LONG_BUSY(hret)) { + sleep_msecs = get_longbusy_msecs(hret); msleep_interruptible(sleep_msecs); continue; } - if (ret < H_SUCCESS) - ehea_error("opcode=%lx ret=%lx" - " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" - " arg5=%lx arg6=%lx arg7=%lx ", - opcode, ret, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7); - - return ret; + if (hret < H_SUCCESS) + ehea_error("op=%lx hret=%lx " + "i1=%lx i2=%lx i3=%lx i4=%lx i5=%lx i6=%lx " + "i7=%lx i8=%lx i9=%lx " + "o1=%lx o2=%lx o3=%lx o4=%lx o5=%lx o6=%lx " + "o7=%lx o8=%lx o9=%lx", + opcode, hret, arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, *out1, *out2, *out3, + *out4, *out5, *out6, *out7, *out8, *out9); + return hret; } - return H_BUSY; } -static long ehea_plpar_hcall9(unsigned long opcode, - unsigned long *outs, /* array of 9 outputs */ - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long arg9) +u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, + const u64 qp_handle, const u64 sel_mask, void *cb_addr) { - long ret; - int i, sleep_msecs; - - for (i = 0; i < 5; i++) { - ret = plpar_hcall9(opcode, outs, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); + u64 dummy; - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) - ehea_error("opcode=%lx ret=%lx" - " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" - " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" - " arg9=%lx" - " out1=%lx out2=%lx out3=%lx out4=%lx" - " out5=%lx out6=%lx out7=%lx out8=%lx" - " out9=%lx", - opcode, ret, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9, - outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - - return ret; + if ((((u64)cb_addr) & (PAGE_SIZE - 1)) != 0) { + ehea_error("not on pageboundary"); + return H_PARAMETER; } - return H_BUSY; -} - -u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, - const u64 qp_handle, const u64 sel_mask, void *cb_addr) -{ - return ehea_plpar_hcall_norets(H_QUERY_HEA_QP, - adapter_handle, /* R4 */ - qp_category, /* R5 */ - qp_handle, /* R6 */ - sel_mask, /* R7 */ - virt_to_abs(cb_addr), /* R8 */ - 0, 0); + return ehea_hcall_9arg_9ret(H_QUERY_HEA_QP, + adapter_handle, /* R4 */ + qp_category, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + virt_to_abs(cb_addr), /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } /* input param R5 */ @@ -208,7 +180,6 @@ u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, u64 *qp_handle, struct h_epas *h_epas) { u64 hret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; u64 allocate_controls = EHEA_BMASK_SET(H_ALL_RES_QP_EQPO, init_attr->low_lat_rq1 ? 1 : 0) @@ -248,29 +219,45 @@ u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ2, init_attr->rq2_threshold) | EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ3, init_attr->rq3_threshold); - hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, - outs, - adapter_handle, /* R4 */ - allocate_controls, /* R5 */ - init_attr->send_cq_handle, /* R6 */ - init_attr->recv_cq_handle, /* R7 */ - init_attr->aff_eq_handle, /* R8 */ - r9_reg, /* R9 */ - max_r10_reg, /* R10 */ - r11_in, /* R11 */ - threshold); /* R12 */ - - *qp_handle = outs[0]; - init_attr->qp_nr = (u32)outs[1]; + u64 r5_out = 0; + u64 r6_out = 0; + u64 r7_out = 0; + u64 r8_out = 0; + u64 r9_out = 0; + u64 g_la_user_out = 0; + u64 r11_out = 0; + u64 r12_out = 0; + + hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + init_attr->send_cq_handle, /* R6 */ + init_attr->recv_cq_handle, /* R7 */ + init_attr->aff_eq_handle, /* R8 */ + r9_reg, /* R9 */ + max_r10_reg, /* R10 */ + r11_in, /* R11 */ + threshold, /* R12 */ + qp_handle, /* R4 */ + &r5_out, /* R5 */ + &r6_out, /* R6 */ + &r7_out, /* R7 */ + &r8_out, /* R8 */ + &r9_out, /* R9 */ + &g_la_user_out, /* R10 */ + &r11_out, /* R11 */ + &r12_out); /* R12 */ + + init_attr->qp_nr = (u32)r5_out; init_attr->act_nr_send_wqes = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_SWQE, outs[2]); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_SWQE, r6_out); init_attr->act_nr_rwqes_rq1 = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R1WQE, outs[2]); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R1WQE, r6_out); init_attr->act_nr_rwqes_rq2 = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R2WQE, outs[2]); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R2WQE, r6_out); init_attr->act_nr_rwqes_rq3 = - (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R3WQE, outs[2]); + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R3WQE, r6_out); init_attr->act_wqe_size_enc_sq = init_attr->wqe_size_enc_sq; init_attr->act_wqe_size_enc_rq1 = init_attr->wqe_size_enc_rq1; @@ -278,25 +265,25 @@ u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, init_attr->act_wqe_size_enc_rq3 = init_attr->wqe_size_enc_rq3; init_attr->nr_sq_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_SQ, outs[4]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_SQ, r8_out); init_attr->nr_rq1_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ1, outs[4]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ1, r8_out); init_attr->nr_rq2_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ2, outs[5]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ2, r9_out); init_attr->nr_rq3_pages = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ3, outs[5]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ3, r9_out); init_attr->liobn_sq = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_SQ, outs[7]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_SQ, r11_out); init_attr->liobn_rq1 = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ1, outs[7]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ1, r11_out); init_attr->liobn_rq2 = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ2, outs[8]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ2, r12_out); init_attr->liobn_rq3 = - (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ3, outs[8]); + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ3, r12_out); if (!hret) - hcp_epas_ctor(h_epas, outs[6], outs[6]); + hcp_epas_ctor(h_epas, g_la_user_out, g_la_user_out); return hret; } @@ -305,24 +292,31 @@ u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, struct ehea_cq_attr *cq_attr, u64 *cq_handle, struct h_epas *epas) { - u64 hret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; - - hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, - outs, - adapter_handle, /* R4 */ - H_ALL_RES_TYPE_CQ, /* R5 */ - cq_attr->eq_handle, /* R6 */ - cq_attr->cq_token, /* R7 */ - cq_attr->max_nr_of_cqes, /* R8 */ - 0, 0, 0, 0); /* R9-R12 */ - - *cq_handle = outs[0]; - cq_attr->act_nr_of_cqes = outs[3]; - cq_attr->nr_pages = outs[4]; + u64 hret, dummy, act_nr_of_cqes_out, act_pages_out; + u64 g_la_privileged_out, g_la_user_out; + + hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, + adapter_handle, /* R4 */ + H_ALL_RES_TYPE_CQ, /* R5 */ + cq_attr->eq_handle, /* R6 */ + cq_attr->cq_token, /* R7 */ + cq_attr->max_nr_of_cqes, /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + cq_handle, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &act_nr_of_cqes_out, /* R7 */ + &act_pages_out, /* R8 */ + &g_la_privileged_out, /* R9 */ + &g_la_user_out, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ + + cq_attr->act_nr_of_cqes = act_nr_of_cqes_out; + cq_attr->nr_pages = act_pages_out; if (!hret) - hcp_epas_ctor(epas, outs[5], outs[6]); + hcp_epas_ctor(epas, g_la_privileged_out, g_la_user_out); return hret; } @@ -367,8 +361,9 @@ u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, struct ehea_eq_attr *eq_attr, u64 *eq_handle) { - u64 hret, allocate_controls; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + u64 hret, dummy, eq_liobn, allocate_controls; + u64 ist1_out, ist2_out, ist3_out, ist4_out; + u64 act_nr_of_eqes_out, act_pages_out; /* resource type */ allocate_controls = @@ -377,20 +372,27 @@ u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, | EHEA_BMASK_SET(H_ALL_RES_EQ_INH_EQE_GEN, !eq_attr->eqe_gen) | EHEA_BMASK_SET(H_ALL_RES_EQ_NON_NEQ_ISN, 1); - hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, - outs, - adapter_handle, /* R4 */ - allocate_controls, /* R5 */ - eq_attr->max_nr_of_eqes, /* R6 */ - 0, 0, 0, 0, 0, 0); /* R7-R10 */ - - *eq_handle = outs[0]; - eq_attr->act_nr_of_eqes = outs[3]; - eq_attr->nr_pages = outs[4]; - eq_attr->ist1 = outs[5]; - eq_attr->ist2 = outs[6]; - eq_attr->ist3 = outs[7]; - eq_attr->ist4 = outs[8]; + hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + eq_attr->max_nr_of_eqes, /* R6 */ + 0, 0, 0, 0, 0, 0, /* R7-R10 */ + eq_handle, /* R4 */ + &dummy, /* R5 */ + &eq_liobn, /* R6 */ + &act_nr_of_eqes_out, /* R7 */ + &act_pages_out, /* R8 */ + &ist1_out, /* R9 */ + &ist2_out, /* R10 */ + &ist3_out, /* R11 */ + &ist4_out); /* R12 */ + + eq_attr->act_nr_of_eqes = act_nr_of_eqes_out; + eq_attr->nr_pages = act_pages_out; + eq_attr->ist1 = ist1_out; + eq_attr->ist2 = ist2_out; + eq_attr->ist3 = ist3_out; + eq_attr->ist4 = ist4_out; return hret; } @@ -400,22 +402,31 @@ u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, const u8 cat, void *cb_addr, u64 *inv_attr_id, u64 *proc_mask, u16 *out_swr, u16 *out_rwr) { - u64 hret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; - - hret = ehea_plpar_hcall9(H_MODIFY_HEA_QP, - outs, - adapter_handle, /* R4 */ - (u64) cat, /* R5 */ - qp_handle, /* R6 */ - sel_mask, /* R7 */ - virt_to_abs(cb_addr), /* R8 */ - 0, 0, 0, 0); /* R9-R12 */ - - *inv_attr_id = outs[0]; - *out_swr = outs[3]; - *out_rwr = outs[4]; - *proc_mask = outs[5]; + u64 hret, dummy, act_out_swr, act_out_rwr; + + if ((((u64)cb_addr) & (PAGE_SIZE - 1)) != 0) { + ehea_error("not on page boundary"); + return H_PARAMETER; + } + + hret = ehea_hcall_9arg_9ret(H_MODIFY_HEA_QP, + adapter_handle, /* R4 */ + (u64) cat, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + virt_to_abs(cb_addr), /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + inv_attr_id, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &act_out_swr, /* R7 */ + &act_out_rwr, /* R8 */ + proc_mask, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ + *out_swr = act_out_swr; + *out_rwr = act_out_rwr; return hret; } @@ -424,81 +435,122 @@ u64 ehea_h_register_rpage(const u64 adapter_handle, const u8 pagesize, const u8 queue_type, const u64 resource_handle, const u64 log_pageaddr, u64 count) { - u64 reg_control; + u64 dummy, reg_control; reg_control = EHEA_BMASK_SET(H_REG_RPAGE_PAGE_SIZE, pagesize) | EHEA_BMASK_SET(H_REG_RPAGE_QT, queue_type); - return ehea_plpar_hcall_norets(H_REGISTER_HEA_RPAGES, - adapter_handle, /* R4 */ - reg_control, /* R5 */ - resource_handle, /* R6 */ - log_pageaddr, /* R7 */ - count, /* R8 */ - 0, 0); /* R9-R10 */ + return ehea_hcall_9arg_9ret(H_REGISTER_HEA_RPAGES, + adapter_handle, /* R4 */ + reg_control, /* R5 */ + resource_handle, /* R6 */ + log_pageaddr, /* R7 */ + count, /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } u64 ehea_h_register_smr(const u64 adapter_handle, const u64 orig_mr_handle, const u64 vaddr_in, const u32 access_ctrl, const u32 pd, struct ehea_mr *mr) { - u64 hret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; - - hret = ehea_plpar_hcall9(H_REGISTER_SMR, - outs, - adapter_handle , /* R4 */ - orig_mr_handle, /* R5 */ - vaddr_in, /* R6 */ - (((u64)access_ctrl) << 32ULL), /* R7 */ - pd, /* R8 */ - 0, 0, 0, 0); /* R9-R12 */ - - mr->handle = outs[0]; - mr->lkey = (u32)outs[2]; + u64 hret, dummy, lkey_out; + + hret = ehea_hcall_9arg_9ret(H_REGISTER_SMR, + adapter_handle , /* R4 */ + orig_mr_handle, /* R5 */ + vaddr_in, /* R6 */ + (((u64)access_ctrl) << 32ULL), /* R7 */ + pd, /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + &mr->handle, /* R4 */ + &dummy, /* R5 */ + &lkey_out, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ + mr->lkey = (u32)lkey_out; return hret; } u64 ehea_h_disable_and_get_hea(const u64 adapter_handle, const u64 qp_handle) { - u64 outs[PLPAR_HCALL9_BUFSIZE]; - - return ehea_plpar_hcall9(H_DISABLE_AND_GET_HEA, - outs, - adapter_handle, /* R4 */ - H_DISABLE_GET_EHEA_WQE_P, /* R5 */ - qp_handle, /* R6 */ - 0, 0, 0, 0, 0, 0); /* R7-R12 */ + u64 hret, dummy, ladr_next_sq_wqe_out; + u64 ladr_next_rq1_wqe_out, ladr_next_rq2_wqe_out, ladr_next_rq3_wqe_out; + + hret = ehea_hcall_9arg_9ret(H_DISABLE_AND_GET_HEA, + adapter_handle, /* R4 */ + H_DISABLE_GET_EHEA_WQE_P, /* R5 */ + qp_handle, /* R6 */ + 0, 0, 0, 0, 0, 0, /* R7-R12 */ + &ladr_next_sq_wqe_out, /* R4 */ + &ladr_next_rq1_wqe_out, /* R5 */ + &ladr_next_rq2_wqe_out, /* R6 */ + &ladr_next_rq3_wqe_out, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ + return hret; } u64 ehea_h_free_resource(const u64 adapter_handle, const u64 res_handle) { - return ehea_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle, /* R4 */ - res_handle, /* R5 */ - 0, 0, 0, 0, 0); /* R6-R10 */ + u64 dummy; + + return ehea_hcall_9arg_9ret(H_FREE_RESOURCE, + adapter_handle, /* R4 */ + res_handle, /* R5 */ + 0, 0, 0, 0, 0, 0, 0, /* R6-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } u64 ehea_h_alloc_resource_mr(const u64 adapter_handle, const u64 vaddr, const u64 length, const u32 access_ctrl, const u32 pd, u64 *mr_handle, u32 *lkey) { - u64 hret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; - - hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, - outs, - adapter_handle, /* R4 */ - 5, /* R5 */ - vaddr, /* R6 */ - length, /* R7 */ - (((u64) access_ctrl) << 32ULL), /* R8 */ - pd, /* R9 */ - 0, 0, 0); /* R10-R12 */ - - *mr_handle = outs[0]; - *lkey = (u32)outs[2]; + u64 hret, dummy, lkey_out; + + hret = ehea_hcall_9arg_9ret(H_ALLOC_HEA_RESOURCE, + adapter_handle, /* R4 */ + 5, /* R5 */ + vaddr, /* R6 */ + length, /* R7 */ + (((u64) access_ctrl) << 32ULL),/* R8 */ + pd, /* R9 */ + 0, 0, 0, /* R10-R12 */ + mr_handle, /* R4 */ + &dummy, /* R5 */ + &lkey_out, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ + *lkey = (u32) lkey_out; + return hret; } @@ -518,14 +570,23 @@ u64 ehea_h_register_rpage_mr(const u64 adapter_handle, const u64 mr_handle, u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr) { - u64 hret, cb_logaddr; + u64 hret, dummy, cb_logaddr; cb_logaddr = virt_to_abs(cb_addr); - hret = ehea_plpar_hcall_norets(H_QUERY_HEA, - adapter_handle, /* R4 */ - cb_logaddr, /* R5 */ - 0, 0, 0, 0, 0); /* R6-R10 */ + hret = ehea_hcall_9arg_9ret(H_QUERY_HEA, + adapter_handle, /* R4 */ + cb_logaddr, /* R5 */ + 0, 0, 0, 0, 0, 0, 0, /* R6-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ #ifdef DEBUG ehea_dmp(cb_addr, sizeof(struct hcp_query_ehea), "hcp_query_ehea"); #endif @@ -536,28 +597,36 @@ u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, const u8 cb_cat, const u64 select_mask, void *cb_addr) { - u64 port_info; + u64 port_info, dummy; u64 cb_logaddr = virt_to_abs(cb_addr); u64 arr_index = 0; port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); - return ehea_plpar_hcall_norets(H_QUERY_HEA_PORT, - adapter_handle, /* R4 */ - port_info, /* R5 */ - select_mask, /* R6 */ - arr_index, /* R7 */ - cb_logaddr, /* R8 */ - 0, 0); /* R9-R10 */ + return ehea_hcall_9arg_9ret(H_QUERY_HEA_PORT, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, const u8 cb_cat, const u64 select_mask, void *cb_addr) { - u64 outs[PLPAR_HCALL9_BUFSIZE]; - u64 port_info; + u64 port_info, dummy, inv_attr_ident, proc_mask; u64 arr_index = 0; u64 cb_logaddr = virt_to_abs(cb_addr); @@ -566,21 +635,29 @@ u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, #ifdef DEBUG ehea_dump(cb_addr, sizeof(struct hcp_ehea_port_cb0), "Before HCALL"); #endif - return ehea_plpar_hcall9(H_MODIFY_HEA_PORT, - outs, - adapter_handle, /* R4 */ - port_info, /* R5 */ - select_mask, /* R6 */ - arr_index, /* R7 */ - cb_logaddr, /* R8 */ - 0, 0, 0, 0); /* R9-R12 */ + return ehea_hcall_9arg_9ret(H_MODIFY_HEA_PORT, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + &inv_attr_ident, /* R4 */ + &proc_mask, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, const u8 reg_type, const u64 mc_mac_addr, const u16 vlan_id, const u32 hcall_id) { - u64 r5_port_num, r6_reg_type, r7_mc_mac_addr, r8_vlan_id; + u64 r5_port_num, r6_reg_type, r7_mc_mac_addr, r8_vlan_id, dummy; u64 mac_addr = mc_mac_addr >> 16; r5_port_num = EHEA_BMASK_SET(H_REGBCMC_PN, port_num); @@ -588,21 +665,41 @@ u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, r7_mc_mac_addr = EHEA_BMASK_SET(H_REGBCMC_MACADDR, mac_addr); r8_vlan_id = EHEA_BMASK_SET(H_REGBCMC_VLANID, vlan_id); - return ehea_plpar_hcall_norets(hcall_id, - adapter_handle, /* R4 */ - r5_port_num, /* R5 */ - r6_reg_type, /* R6 */ - r7_mc_mac_addr, /* R7 */ - r8_vlan_id, /* R8 */ - 0, 0); /* R9-R12 */ + return ehea_hcall_9arg_9ret(hcall_id, + adapter_handle, /* R4 */ + r5_port_num, /* R5 */ + r6_reg_type, /* R6 */ + r7_mc_mac_addr, /* R7 */ + r8_vlan_id, /* R8 */ + 0, 0, 0, 0, /* R9-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } u64 ehea_h_reset_events(const u64 adapter_handle, const u64 neq_handle, const u64 event_mask) { - return ehea_plpar_hcall_norets(H_RESET_EVENTS, - adapter_handle, /* R4 */ - neq_handle, /* R5 */ - event_mask, /* R6 */ - 0, 0, 0, 0); /* R7-R12 */ + u64 dummy; + + return ehea_hcall_9arg_9ret(H_RESET_EVENTS, + adapter_handle, /* R4 */ + neq_handle, /* R5 */ + event_mask, /* R6 */ + 0, 0, 0, 0, 0, 0, /* R7-R12 */ + &dummy, /* R4 */ + &dummy, /* R5 */ + &dummy, /* R6 */ + &dummy, /* R7 */ + &dummy, /* R8 */ + &dummy, /* R9 */ + &dummy, /* R10 */ + &dummy, /* R11 */ + &dummy); /* R12 */ } diff --git a/trunk/drivers/net/eth16i.c b/trunk/drivers/net/eth16i.c index b7b8bc2a6307..8cc3c331aca8 100644 --- a/trunk/drivers/net/eth16i.c +++ b/trunk/drivers/net/eth16i.c @@ -162,9 +162,9 @@ static char *version = #include #include #include -#include #include +#include #include diff --git a/trunk/drivers/net/forcedeth.c b/trunk/drivers/net/forcedeth.c index c5ed635bce36..99b7a411db28 100644 --- a/trunk/drivers/net/forcedeth.c +++ b/trunk/drivers/net/forcedeth.c @@ -2497,7 +2497,6 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) u8 __iomem *base = get_hwbase(dev); u32 events; int i; - unsigned long flags; dprintk(KERN_DEBUG "%s: nv_nic_irq_tx\n", dev->name); @@ -2509,16 +2508,16 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) if (!(events & np->irqmask)) break; - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); nv_tx_done(dev); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); if (events & (NVREG_IRQ_TX_ERR)) { dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n", dev->name, events); } if (i > max_interrupt_work) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); /* disable interrupts on the nic */ writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask); pci_push(base); @@ -2528,7 +2527,7 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); break; } @@ -2602,7 +2601,6 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) u8 __iomem *base = get_hwbase(dev); u32 events; int i; - unsigned long flags; dprintk(KERN_DEBUG "%s: nv_nic_irq_rx\n", dev->name); @@ -2616,14 +2614,14 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) nv_rx_process(dev, dev->weight); if (nv_alloc_rx(dev)) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); if (!np->in_shutdown) mod_timer(&np->oom_kick, jiffies + OOM_REFILL); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); } if (i > max_interrupt_work) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); /* disable interrupts on the nic */ writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask); pci_push(base); @@ -2633,7 +2631,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); break; } } @@ -2650,7 +2648,6 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) u8 __iomem *base = get_hwbase(dev); u32 events; int i; - unsigned long flags; dprintk(KERN_DEBUG "%s: nv_nic_irq_other\n", dev->name); @@ -2663,14 +2660,14 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) break; if (events & NVREG_IRQ_LINK) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); nv_link_irq(dev); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); } if (np->need_linktimer && time_after(jiffies, np->link_timeout)) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); nv_linkchange(dev); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); np->link_timeout = jiffies + LINK_TIMEOUT; } if (events & (NVREG_IRQ_UNKNOWN)) { @@ -2678,7 +2675,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) dev->name, events); } if (i > max_interrupt_work) { - spin_lock_irqsave(&np->lock, flags); + spin_lock_irq(&np->lock); /* disable interrupts on the nic */ writel(NVREG_IRQ_OTHER, base + NvRegIrqMask); pci_push(base); @@ -2688,7 +2685,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data) mod_timer(&np->nic_poll, jiffies + POLL_WAIT); } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i); - spin_unlock_irqrestore(&np->lock, flags); + spin_unlock_irq(&np->lock); break; } diff --git a/trunk/drivers/net/ibmveth.c b/trunk/drivers/net/ibmveth.c index 2802db23d3cb..4bac3cd8f235 100644 --- a/trunk/drivers/net/ibmveth.c +++ b/trunk/drivers/net/ibmveth.c @@ -213,7 +213,6 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc } free_index = pool->consumer_index++ % pool->size; - pool->consumer_index = free_index; index = pool->free_map[free_index]; ibmveth_assert(index != IBM_VETH_INVALID_MAP); @@ -239,10 +238,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc if(lpar_rc != H_SUCCESS) { pool->free_map[free_index] = index; pool->skbuff[index] = NULL; - if (pool->consumer_index == 0) - pool->consumer_index = pool->size - 1; - else - pool->consumer_index--; + pool->consumer_index--; dma_unmap_single(&adapter->vdev->dev, pool->dma_addr[index], pool->buff_size, DMA_FROM_DEVICE); @@ -330,7 +326,6 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, u64 DMA_FROM_DEVICE); free_index = adapter->rx_buff_pool[pool].producer_index++ % adapter->rx_buff_pool[pool].size; - adapter->rx_buff_pool[pool].producer_index = free_index; adapter->rx_buff_pool[pool].free_map[free_index] = index; mb(); @@ -442,31 +437,6 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) &adapter->rx_buff_pool[i]); } -static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, - union ibmveth_buf_desc rxq_desc, u64 mac_address) -{ - int rc, try_again = 1; - - /* After a kexec the adapter will still be open, so our attempt to - * open it will fail. So if we get a failure we free the adapter and - * try again, but only once. */ -retry: - rc = h_register_logical_lan(adapter->vdev->unit_address, - adapter->buffer_list_dma, rxq_desc.desc, - adapter->filter_list_dma, mac_address); - - if (rc != H_SUCCESS && try_again) { - do { - rc = h_free_logical_lan(adapter->vdev->unit_address); - } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); - - try_again = 0; - goto retry; - } - - return rc; -} - static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev->priv; @@ -532,9 +502,12 @@ static int ibmveth_open(struct net_device *netdev) ibmveth_debug_printk("filter list @ 0x%p\n", adapter->filter_list_addr); ibmveth_debug_printk("receive q @ 0x%p\n", adapter->rx_queue.queue_addr); - h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address); + lpar_rc = h_register_logical_lan(adapter->vdev->unit_address, + adapter->buffer_list_dma, + rxq_desc.desc, + adapter->filter_list_dma, + mac_address); if(lpar_rc != H_SUCCESS) { ibmveth_error_printk("h_register_logical_lan failed with %ld\n", lpar_rc); @@ -932,14 +905,6 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ibmveth_poll_controller(struct net_device *dev) -{ - ibmveth_replenish_task(dev->priv); - ibmveth_interrupt(dev->irq, dev); -} -#endif - static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) { int rc, i; @@ -1012,9 +977,6 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_ netdev->ethtool_ops = &netdev_ethtool_ops; netdev->change_mtu = ibmveth_change_mtu; SET_NETDEV_DEV(netdev, &dev->dev); -#ifdef CONFIG_NET_POLL_CONTROLLER - netdev->poll_controller = ibmveth_poll_controller; -#endif netdev->features |= NETIF_F_LLTX; spin_lock_init(&adapter->stats_lock); @@ -1170,9 +1132,7 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter) { struct proc_dir_entry *entry; if (ibmveth_proc_dir) { - char u_addr[10]; - sprintf(u_addr, "%x", adapter->vdev->unit_address); - entry = create_proc_entry(u_addr, S_IFREG, ibmveth_proc_dir); + entry = create_proc_entry(adapter->netdev->name, S_IFREG, ibmveth_proc_dir); if (!entry) { ibmveth_error_printk("Cannot create adapter proc entry"); } else { @@ -1187,9 +1147,7 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter) static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter) { if (ibmveth_proc_dir) { - char u_addr[10]; - sprintf(u_addr, "%x", adapter->vdev->unit_address); - remove_proc_entry(u_addr, ibmveth_proc_dir); + remove_proc_entry(adapter->netdev->name, ibmveth_proc_dir); } } diff --git a/trunk/drivers/net/mv643xx_eth.c b/trunk/drivers/net/mv643xx_eth.c index 9997081c6dae..2ffa3a59e704 100644 --- a/trunk/drivers/net/mv643xx_eth.c +++ b/trunk/drivers/net/mv643xx_eth.c @@ -2155,7 +2155,7 @@ static void eth_update_mib_counters(struct mv643xx_private *mp) for (offset = ETH_MIB_BAD_OCTETS_RECEIVED; offset <= ETH_MIB_FRAMES_1024_TO_MAX_OCTETS; offset += 4) - *(u32 *)((char *)p + offset) += read_mib(mp, offset); + *(u32 *)((char *)p + offset) = read_mib(mp, offset); p->good_octets_sent += read_mib(mp, ETH_MIB_GOOD_OCTETS_SENT_LOW); p->good_octets_sent += @@ -2164,7 +2164,7 @@ static void eth_update_mib_counters(struct mv643xx_private *mp) for (offset = ETH_MIB_GOOD_FRAMES_SENT; offset <= ETH_MIB_LATE_COLLISION; offset += 4) - *(u32 *)((char *)p + offset) += read_mib(mp, offset); + *(u32 *)((char *)p + offset) = read_mib(mp, offset); } /* diff --git a/trunk/drivers/net/skge.c b/trunk/drivers/net/skge.c index e7e414928f89..a4a58e4e93a1 100644 --- a/trunk/drivers/net/skge.c +++ b/trunk/drivers/net/skge.c @@ -43,7 +43,7 @@ #include "skge.h" #define DRV_NAME "skge" -#define DRV_VERSION "1.9" +#define DRV_VERSION "1.8" #define PFX DRV_NAME " " #define DEFAULT_TX_RING_SIZE 128 @@ -197,8 +197,8 @@ static u32 skge_supported_modes(const struct skge_hw *hw) else if (hw->chip_id == CHIP_ID_YUKON) supported &= ~SUPPORTED_1000baseT_Half; } else - supported = SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half - | SUPPORTED_FIBRE | SUPPORTED_Autoneg; + supported = SUPPORTED_1000baseT_Full | SUPPORTED_FIBRE + | SUPPORTED_Autoneg; return supported; } @@ -487,37 +487,31 @@ static void skge_get_pauseparam(struct net_device *dev, { struct skge_port *skge = netdev_priv(dev); - ecmd->rx_pause = (skge->flow_control == FLOW_MODE_SYMMETRIC) - || (skge->flow_control == FLOW_MODE_SYM_OR_REM); - ecmd->tx_pause = ecmd->rx_pause || (skge->flow_control == FLOW_MODE_LOC_SEND); + ecmd->tx_pause = (skge->flow_control == FLOW_MODE_LOC_SEND) + || (skge->flow_control == FLOW_MODE_SYMMETRIC); + ecmd->rx_pause = (skge->flow_control == FLOW_MODE_REM_SEND) + || (skge->flow_control == FLOW_MODE_SYMMETRIC); - ecmd->autoneg = ecmd->rx_pause || ecmd->tx_pause; + ecmd->autoneg = skge->autoneg; } static int skge_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *ecmd) { struct skge_port *skge = netdev_priv(dev); - struct ethtool_pauseparam old; - - skge_get_pauseparam(dev, &old); - if (ecmd->autoneg != old.autoneg) - skge->flow_control = ecmd->autoneg ? FLOW_MODE_NONE : FLOW_MODE_SYMMETRIC; - else { - if (ecmd->rx_pause && ecmd->tx_pause) - skge->flow_control = FLOW_MODE_SYMMETRIC; - else if (ecmd->rx_pause && !ecmd->tx_pause) - skge->flow_control = FLOW_MODE_SYM_OR_REM; - else if (!ecmd->rx_pause && ecmd->tx_pause) - skge->flow_control = FLOW_MODE_LOC_SEND; - else - skge->flow_control = FLOW_MODE_NONE; - } + skge->autoneg = ecmd->autoneg; + if (ecmd->rx_pause && ecmd->tx_pause) + skge->flow_control = FLOW_MODE_SYMMETRIC; + else if (ecmd->rx_pause && !ecmd->tx_pause) + skge->flow_control = FLOW_MODE_REM_SEND; + else if (!ecmd->rx_pause && ecmd->tx_pause) + skge->flow_control = FLOW_MODE_LOC_SEND; + else + skge->flow_control = FLOW_MODE_NONE; if (netif_running(dev)) skge_phy_reset(skge); - return 0; } @@ -860,23 +854,6 @@ static int skge_rx_fill(struct net_device *dev) return 0; } -static const char *skge_pause(enum pause_status status) -{ - switch(status) { - case FLOW_STAT_NONE: - return "none"; - case FLOW_STAT_REM_SEND: - return "rx only"; - case FLOW_STAT_LOC_SEND: - return "tx_only"; - case FLOW_STAT_SYMMETRIC: /* Both station may send PAUSE */ - return "both"; - default: - return "indeterminated"; - } -} - - static void skge_link_up(struct skge_port *skge) { skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), @@ -885,13 +862,16 @@ static void skge_link_up(struct skge_port *skge) netif_carrier_on(skge->netdev); netif_wake_queue(skge->netdev); - if (netif_msg_link(skge)) { + if (netif_msg_link(skge)) printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex, flow control %s\n", skge->netdev->name, skge->speed, skge->duplex == DUPLEX_FULL ? "full" : "half", - skge_pause(skge->flow_status)); - } + (skge->flow_control == FLOW_MODE_NONE) ? "none" : + (skge->flow_control == FLOW_MODE_LOC_SEND) ? "tx only" : + (skge->flow_control == FLOW_MODE_REM_SEND) ? "rx only" : + (skge->flow_control == FLOW_MODE_SYMMETRIC) ? "tx and rx" : + "unknown"); } static void skge_link_down(struct skge_port *skge) @@ -904,29 +884,6 @@ static void skge_link_down(struct skge_port *skge) printk(KERN_INFO PFX "%s: Link is down.\n", skge->netdev->name); } - -static void xm_link_down(struct skge_hw *hw, int port) -{ - struct net_device *dev = hw->dev[port]; - struct skge_port *skge = netdev_priv(dev); - u16 cmd, msk; - - if (hw->phy_type == SK_PHY_XMAC) { - msk = xm_read16(hw, port, XM_IMSK); - msk |= XM_IS_INP_ASS | XM_IS_LIPA_RC | XM_IS_RX_PAGE | XM_IS_AND; - xm_write16(hw, port, XM_IMSK, msk); - } - - cmd = xm_read16(hw, port, XM_MMU_CMD); - cmd &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); - xm_write16(hw, port, XM_MMU_CMD, cmd); - /* dummy read to ensure writing */ - (void) xm_read16(hw, port, XM_MMU_CMD); - - if (netif_carrier_ok(dev)) - skge_link_down(skge); -} - static int __xm_phy_read(struct skge_hw *hw, int port, u16 reg, u16 *val) { int i; @@ -1035,15 +992,7 @@ static const u16 phy_pause_map[] = { [FLOW_MODE_NONE] = 0, [FLOW_MODE_LOC_SEND] = PHY_AN_PAUSE_ASYM, [FLOW_MODE_SYMMETRIC] = PHY_AN_PAUSE_CAP, - [FLOW_MODE_SYM_OR_REM] = PHY_AN_PAUSE_CAP | PHY_AN_PAUSE_ASYM, -}; - -/* special defines for FIBER (88E1011S only) */ -static const u16 fiber_pause_map[] = { - [FLOW_MODE_NONE] = PHY_X_P_NO_PAUSE, - [FLOW_MODE_LOC_SEND] = PHY_X_P_ASYM_MD, - [FLOW_MODE_SYMMETRIC] = PHY_X_P_SYM_MD, - [FLOW_MODE_SYM_OR_REM] = PHY_X_P_BOTH_MD, + [FLOW_MODE_REM_SEND] = PHY_AN_PAUSE_CAP | PHY_AN_PAUSE_ASYM, }; @@ -1059,7 +1008,14 @@ static void bcom_check_link(struct skge_hw *hw, int port) status = xm_phy_read(hw, port, PHY_BCOM_STAT); if ((status & PHY_ST_LSYNC) == 0) { - xm_link_down(hw, port); + u16 cmd = xm_read16(hw, port, XM_MMU_CMD); + cmd &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); + xm_write16(hw, port, XM_MMU_CMD, cmd); + /* dummy read to ensure writing */ + (void) xm_read16(hw, port, XM_MMU_CMD); + + if (netif_carrier_ok(dev)) + skge_link_down(skge); return; } @@ -1092,19 +1048,20 @@ static void bcom_check_link(struct skge_hw *hw, int port) return; } + /* We are using IEEE 802.3z/D5.0 Table 37-4 */ switch (aux & PHY_B_AS_PAUSE_MSK) { case PHY_B_AS_PAUSE_MSK: - skge->flow_status = FLOW_STAT_SYMMETRIC; + skge->flow_control = FLOW_MODE_SYMMETRIC; break; case PHY_B_AS_PRR: - skge->flow_status = FLOW_STAT_REM_SEND; + skge->flow_control = FLOW_MODE_REM_SEND; break; case PHY_B_AS_PRT: - skge->flow_status = FLOW_STAT_LOC_SEND; + skge->flow_control = FLOW_MODE_LOC_SEND; break; default: - skge->flow_status = FLOW_STAT_NONE; + skge->flow_control = FLOW_MODE_NONE; } skge->speed = SPEED_1000; } @@ -1234,7 +1191,17 @@ static void xm_phy_init(struct skge_port *skge) if (skge->advertising & ADVERTISED_1000baseT_Full) ctrl |= PHY_X_AN_FD; - ctrl |= fiber_pause_map[skge->flow_control]; + switch(skge->flow_control) { + case FLOW_MODE_NONE: + ctrl |= PHY_X_P_NO_PAUSE; + break; + case FLOW_MODE_LOC_SEND: + ctrl |= PHY_X_P_ASYM_MD; + break; + case FLOW_MODE_SYMMETRIC: + ctrl |= PHY_X_P_BOTH_MD; + break; + } xm_phy_write(hw, port, PHY_XMAC_AUNE_ADV, ctrl); @@ -1268,7 +1235,14 @@ static void xm_check_link(struct net_device *dev) status = xm_phy_read(hw, port, PHY_XMAC_STAT); if ((status & PHY_ST_LSYNC) == 0) { - xm_link_down(hw, port); + u16 cmd = xm_read16(hw, port, XM_MMU_CMD); + cmd &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); + xm_write16(hw, port, XM_MMU_CMD, cmd); + /* dummy read to ensure writing */ + (void) xm_read16(hw, port, XM_MMU_CMD); + + if (netif_carrier_ok(dev)) + skge_link_down(skge); return; } @@ -1302,20 +1276,15 @@ static void xm_check_link(struct net_device *dev) } /* We are using IEEE 802.3z/D5.0 Table 37-4 */ - if ((skge->flow_control == FLOW_MODE_SYMMETRIC || - skge->flow_control == FLOW_MODE_SYM_OR_REM) && - (lpa & PHY_X_P_SYM_MD)) - skge->flow_status = FLOW_STAT_SYMMETRIC; - else if (skge->flow_control == FLOW_MODE_SYM_OR_REM && - (lpa & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) - /* Enable PAUSE receive, disable PAUSE transmit */ - skge->flow_status = FLOW_STAT_REM_SEND; - else if (skge->flow_control == FLOW_MODE_LOC_SEND && - (lpa & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) - /* Disable PAUSE receive, enable PAUSE transmit */ - skge->flow_status = FLOW_STAT_LOC_SEND; + if (lpa & PHY_X_P_SYM_MD) + skge->flow_control = FLOW_MODE_SYMMETRIC; + else if ((lpa & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) + skge->flow_control = FLOW_MODE_REM_SEND; + else if ((lpa & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) + skge->flow_control = FLOW_MODE_LOC_SEND; else - skge->flow_status = FLOW_STAT_NONE; + skge->flow_control = FLOW_MODE_NONE; + skge->speed = SPEED_1000; } @@ -1599,10 +1568,6 @@ static void genesis_mac_intr(struct skge_hw *hw, int port) printk(KERN_DEBUG PFX "%s: mac interrupt status 0x%x\n", skge->netdev->name, status); - if (hw->phy_type == SK_PHY_XMAC && - (status & (XM_IS_INP_ASS | XM_IS_LIPA_RC))) - xm_link_down(hw, port); - if (status & XM_IS_TXF_UR) { xm_write32(hw, port, XM_MODE, XM_MD_FTF); ++skge->net_stats.tx_fifo_errors; @@ -1617,7 +1582,7 @@ static void genesis_link_up(struct skge_port *skge) { struct skge_hw *hw = skge->hw; int port = skge->port; - u16 cmd, msk; + u16 cmd; u32 mode; cmd = xm_read16(hw, port, XM_MMU_CMD); @@ -1626,8 +1591,8 @@ static void genesis_link_up(struct skge_port *skge) * enabling pause frame reception is required for 1000BT * because the XMAC is not reset if the link is going down */ - if (skge->flow_status == FLOW_STAT_NONE || - skge->flow_status == FLOW_STAT_LOC_SEND) + if (skge->flow_control == FLOW_MODE_NONE || + skge->flow_control == FLOW_MODE_LOC_SEND) /* Disable Pause Frame Reception */ cmd |= XM_MMU_IGN_PF; else @@ -1637,8 +1602,8 @@ static void genesis_link_up(struct skge_port *skge) xm_write16(hw, port, XM_MMU_CMD, cmd); mode = xm_read32(hw, port, XM_MODE); - if (skge->flow_status== FLOW_STAT_SYMMETRIC || - skge->flow_status == FLOW_STAT_LOC_SEND) { + if (skge->flow_control == FLOW_MODE_SYMMETRIC || + skge->flow_control == FLOW_MODE_LOC_SEND) { /* * Configure Pause Frame Generation * Use internal and external Pause Frame Generation. @@ -1666,11 +1631,7 @@ static void genesis_link_up(struct skge_port *skge) } xm_write32(hw, port, XM_MODE, mode); - msk = XM_DEF_MSK; - if (hw->phy_type != SK_PHY_XMAC) - msk |= XM_IS_INP_ASS; /* disable GP0 interrupt bit */ - - xm_write16(hw, port, XM_IMSK, msk); + xm_write16(hw, port, XM_IMSK, XM_DEF_MSK); xm_read16(hw, port, XM_ISRC); /* get MMU Command Reg. */ @@ -1818,17 +1779,11 @@ static void yukon_init(struct skge_hw *hw, int port) adv |= PHY_M_AN_10_FD; if (skge->advertising & ADVERTISED_10baseT_Half) adv |= PHY_M_AN_10_HD; + } else /* special defines for FIBER (88E1011S only) */ + adv |= PHY_M_AN_1000X_AHD | PHY_M_AN_1000X_AFD; - /* Set Flow-control capabilities */ - adv |= phy_pause_map[skge->flow_control]; - } else { - if (skge->advertising & ADVERTISED_1000baseT_Full) - adv |= PHY_M_AN_1000X_AFD; - if (skge->advertising & ADVERTISED_1000baseT_Half) - adv |= PHY_M_AN_1000X_AHD; - - adv |= fiber_pause_map[skge->flow_control]; - } + /* Set Flow-control capabilities */ + adv |= phy_pause_map[skge->flow_control]; /* Restart Auto-negotiation */ ctrl |= PHY_CT_ANE | PHY_CT_RE_CFG; @@ -1962,11 +1917,6 @@ static void yukon_mac_init(struct skge_hw *hw, int port) case FLOW_MODE_LOC_SEND: /* disable Rx flow-control */ reg |= GM_GPCR_FC_RX_DIS | GM_GPCR_AU_FCT_DIS; - break; - case FLOW_MODE_SYMMETRIC: - case FLOW_MODE_SYM_OR_REM: - /* enable Tx & Rx flow-control */ - break; } gma_write16(hw, port, GM_GP_CTRL, reg); @@ -2161,11 +2111,13 @@ static void yukon_link_down(struct skge_port *skge) ctrl &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); gma_write16(hw, port, GM_GP_CTRL, ctrl); - if (skge->flow_status == FLOW_STAT_REM_SEND) { - ctrl = gm_phy_read(hw, port, PHY_MARV_AUNE_ADV); - ctrl |= PHY_M_AN_ASP; + if (skge->flow_control == FLOW_MODE_REM_SEND) { /* restore Asymmetric Pause bit */ - gm_phy_write(hw, port, PHY_MARV_AUNE_ADV, ctrl); + gm_phy_write(hw, port, PHY_MARV_AUNE_ADV, + gm_phy_read(hw, port, + PHY_MARV_AUNE_ADV) + | PHY_M_AN_ASP); + } yukon_reset(hw, port); @@ -2212,19 +2164,19 @@ static void yukon_phy_intr(struct skge_port *skge) /* We are using IEEE 802.3z/D5.0 Table 37-4 */ switch (phystat & PHY_M_PS_PAUSE_MSK) { case PHY_M_PS_PAUSE_MSK: - skge->flow_status = FLOW_STAT_SYMMETRIC; + skge->flow_control = FLOW_MODE_SYMMETRIC; break; case PHY_M_PS_RX_P_EN: - skge->flow_status = FLOW_STAT_REM_SEND; + skge->flow_control = FLOW_MODE_REM_SEND; break; case PHY_M_PS_TX_P_EN: - skge->flow_status = FLOW_STAT_LOC_SEND; + skge->flow_control = FLOW_MODE_LOC_SEND; break; default: - skge->flow_status = FLOW_STAT_NONE; + skge->flow_control = FLOW_MODE_NONE; } - if (skge->flow_status == FLOW_STAT_NONE || + if (skge->flow_control == FLOW_MODE_NONE || (skge->speed < SPEED_1000 && skge->duplex == DUPLEX_HALF)) skge_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); else @@ -3447,7 +3399,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, /* Auto speed and flow control */ skge->autoneg = AUTONEG_ENABLE; - skge->flow_control = FLOW_MODE_SYM_OR_REM; + skge->flow_control = FLOW_MODE_SYMMETRIC; skge->duplex = -1; skge->speed = -1; skge->advertising = skge_supported_modes(hw); diff --git a/trunk/drivers/net/skge.h b/trunk/drivers/net/skge.h index 537c0aaa1db8..d0b47d46cf9d 100644 --- a/trunk/drivers/net/skge.h +++ b/trunk/drivers/net/skge.h @@ -2195,8 +2195,7 @@ enum { XM_IS_RX_COMP = 1<<0, /* Bit 0: Frame Rx Complete */ }; -#define XM_DEF_MSK (~(XM_IS_INP_ASS | XM_IS_LIPA_RC | \ - XM_IS_RXF_OV | XM_IS_TXF_UR)) +#define XM_DEF_MSK (~(XM_IS_RXC_OV | XM_IS_TXC_OV | XM_IS_RXF_OV | XM_IS_TXF_UR)) /* XM_HW_CFG 16 bit r/w Hardware Config Register */ @@ -2427,24 +2426,13 @@ struct skge_hw { struct mutex phy_mutex; }; -enum pause_control { - FLOW_MODE_NONE = 1, /* No Flow-Control */ - FLOW_MODE_LOC_SEND = 2, /* Local station sends PAUSE */ +enum { + FLOW_MODE_NONE = 0, /* No Flow-Control */ + FLOW_MODE_LOC_SEND = 1, /* Local station sends PAUSE */ + FLOW_MODE_REM_SEND = 2, /* Symmetric or just remote */ FLOW_MODE_SYMMETRIC = 3, /* Both stations may send PAUSE */ - FLOW_MODE_SYM_OR_REM = 4, /* Both stations may send PAUSE or - * just the remote station may send PAUSE - */ -}; - -enum pause_status { - FLOW_STAT_INDETERMINATED=0, /* indeterminated */ - FLOW_STAT_NONE, /* No Flow Control */ - FLOW_STAT_REM_SEND, /* Remote Station sends PAUSE */ - FLOW_STAT_LOC_SEND, /* Local station sends PAUSE */ - FLOW_STAT_SYMMETRIC, /* Both station may send PAUSE */ }; - struct skge_port { u32 msg_enable; struct skge_hw *hw; @@ -2457,10 +2445,9 @@ struct skge_port { struct net_device_stats net_stats; struct work_struct link_thread; - enum pause_control flow_control; - enum pause_status flow_status; u8 rx_csum; u8 blink_on; + u8 flow_control; u8 wol; u8 autoneg; /* AUTONEG_ENABLE, AUTONEG_DISABLE */ u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ diff --git a/trunk/drivers/net/sky2.c b/trunk/drivers/net/sky2.c index c10e7f5faa5f..459c845d6648 100644 --- a/trunk/drivers/net/sky2.c +++ b/trunk/drivers/net/sky2.c @@ -683,7 +683,7 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) sky2_write16(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_OPER_ON); if (hw->chip_id == CHIP_ID_YUKON_EC_U) { - sky2_write8(hw, SK_REG(port, RX_GMF_LP_THR), 512/8); + sky2_write8(hw, SK_REG(port, RX_GMF_LP_THR), 768/8); sky2_write8(hw, SK_REG(port, RX_GMF_UP_THR), 1024/8); if (hw->dev[port]->mtu > ETH_DATA_LEN) { /* set Tx GMAC FIFO Almost Empty Threshold */ @@ -1907,7 +1907,7 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2, pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr, length, PCI_DMA_FROMDEVICE); re->skb->ip_summed = CHECKSUM_NONE; - skb_put(skb, length); + __skb_put(skb, length); } return skb; } @@ -1970,7 +1970,7 @@ static struct sk_buff *receive_new(struct sky2_port *sky2, if (skb_shinfo(skb)->nr_frags) skb_put_frags(skb, hdr_space, length); else - skb_put(skb, length); + skb_put(skb, hdr_space); return skb; } @@ -2220,7 +2220,8 @@ static void sky2_hw_intr(struct sky2_hw *hw) /* PCI-Express uncorrectable Error occurred */ u32 pex_err; - pex_err = sky2_pci_read32(hw, PEX_UNC_ERR_STAT); + pex_err = sky2_pci_read32(hw, + hw->err_cap + PCI_ERR_UNCOR_STATUS); if (net_ratelimit()) printk(KERN_ERR PFX "%s: pci express error (0x%x)\n", @@ -2228,15 +2229,20 @@ static void sky2_hw_intr(struct sky2_hw *hw) /* clear the interrupt */ sky2_write32(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); - sky2_pci_write32(hw, PEX_UNC_ERR_STAT, - 0xffffffffUL); + sky2_pci_write32(hw, + hw->err_cap + PCI_ERR_UNCOR_STATUS, + 0xffffffffUL); sky2_write32(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); - if (pex_err & PEX_FATAL_ERRORS) { + + /* In case of fatal error mask off to keep from getting stuck */ + if (pex_err & (PCI_ERR_UNC_POISON_TLP | PCI_ERR_UNC_FCP + | PCI_ERR_UNC_DLP)) { u32 hwmsk = sky2_read32(hw, B0_HWE_IMSK); hwmsk &= ~Y2_IS_PCI_EXP; sky2_write32(hw, B0_HWE_IMSK, hwmsk); } + } if (status & Y2_HWE_L1_MASK) @@ -2417,6 +2423,7 @@ static int sky2_reset(struct sky2_hw *hw) u16 status; u8 t8; int i; + u32 msk; sky2_write8(hw, B0_CTST, CS_RST_CLR); @@ -2457,9 +2464,13 @@ static int sky2_reset(struct sky2_hw *hw) sky2_write8(hw, B0_CTST, CS_MRST_CLR); /* clear any PEX errors */ - if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) - sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL); - + if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP)) { + hw->err_cap = pci_find_ext_capability(hw->pdev, PCI_EXT_CAP_ID_ERR); + if (hw->err_cap) + sky2_pci_write32(hw, + hw->err_cap + PCI_ERR_UNCOR_STATUS, + 0xffffffffUL); + } hw->pmd_type = sky2_read8(hw, B2_PMD_TYP); hw->ports = 1; @@ -2516,7 +2527,10 @@ static int sky2_reset(struct sky2_hw *hw) sky2_write8(hw, RAM_BUFFER(i, B3_RI_RTO_XS2), SK_RI_TO_53); } - sky2_write32(hw, B0_HWE_IMSK, Y2_HWE_ALL_MASK); + msk = Y2_HWE_ALL_MASK; + if (!hw->err_cap) + msk &= ~Y2_IS_PCI_EXP; + sky2_write32(hw, B0_HWE_IMSK, msk); for (i = 0; i < hw->ports; i++) sky2_gmac_reset(hw, i); diff --git a/trunk/drivers/net/sky2.h b/trunk/drivers/net/sky2.h index 43d2accf60e1..f66109a96d95 100644 --- a/trunk/drivers/net/sky2.h +++ b/trunk/drivers/net/sky2.h @@ -6,24 +6,15 @@ #define ETH_JUMBO_MTU 9000 /* Maximum MTU supported */ -/* PCI config registers */ +/* PCI device specific config registers */ enum { PCI_DEV_REG1 = 0x40, PCI_DEV_REG2 = 0x44, - PCI_DEV_STATUS = 0x7c, PCI_DEV_REG3 = 0x80, PCI_DEV_REG4 = 0x84, PCI_DEV_REG5 = 0x88, }; -enum { - PEX_DEV_CAP = 0xe4, - PEX_DEV_CTRL = 0xe8, - PEX_DEV_STA = 0xea, - PEX_LNK_STAT = 0xf2, - PEX_UNC_ERR_STAT= 0x104, -}; - /* Yukon-2 */ enum pci_dev_reg_1 { PCI_Y2_PIG_ENA = 1<<31, /* Enable Plug-in-Go (YUKON-2) */ @@ -72,39 +63,6 @@ enum pci_dev_reg_4 { PCI_STATUS_REC_MASTER_ABORT | \ PCI_STATUS_REC_TARGET_ABORT | \ PCI_STATUS_PARITY) - -enum pex_dev_ctrl { - PEX_DC_MAX_RRS_MSK = 7<<12, /* Bit 14..12: Max. Read Request Size */ - PEX_DC_EN_NO_SNOOP = 1<<11,/* Enable No Snoop */ - PEX_DC_EN_AUX_POW = 1<<10,/* Enable AUX Power */ - PEX_DC_EN_PHANTOM = 1<<9, /* Enable Phantom Functions */ - PEX_DC_EN_EXT_TAG = 1<<8, /* Enable Extended Tag Field */ - PEX_DC_MAX_PLS_MSK = 7<<5, /* Bit 7.. 5: Max. Payload Size Mask */ - PEX_DC_EN_REL_ORD = 1<<4, /* Enable Relaxed Ordering */ - PEX_DC_EN_UNS_RQ_RP = 1<<3, /* Enable Unsupported Request Reporting */ - PEX_DC_EN_FAT_ER_RP = 1<<2, /* Enable Fatal Error Reporting */ - PEX_DC_EN_NFA_ER_RP = 1<<1, /* Enable Non-Fatal Error Reporting */ - PEX_DC_EN_COR_ER_RP = 1<<0, /* Enable Correctable Error Reporting */ -}; -#define PEX_DC_MAX_RD_RQ_SIZE(x) (((x)<<12) & PEX_DC_MAX_RRS_MSK) - -/* PEX_UNC_ERR_STAT PEX Uncorrectable Errors Status Register (Yukon-2) */ -enum pex_err { - PEX_UNSUP_REQ = 1<<20, /* Unsupported Request Error */ - - PEX_MALFOR_TLP = 1<<18, /* Malformed TLP */ - - PEX_UNEXP_COMP = 1<<16, /* Unexpected Completion */ - - PEX_COMP_TO = 1<<14, /* Completion Timeout */ - PEX_FLOW_CTRL_P = 1<<13, /* Flow Control Protocol Error */ - PEX_POIS_TLP = 1<<12, /* Poisoned TLP */ - - PEX_DATA_LINK_P = 1<<4, /* Data Link Protocol Error */ - PEX_FATAL_ERRORS= (PEX_MALFOR_TLP | PEX_FLOW_CTRL_P | PEX_DATA_LINK_P), -}; - - enum csr_regs { B0_RAP = 0x0000, B0_CTST = 0x0004, @@ -1878,6 +1836,7 @@ struct sky2_hw { struct net_device *dev[2]; int pm_cap; + int err_cap; u8 chip_id; u8 chip_rev; u8 pmd_type; diff --git a/trunk/drivers/net/smc91x.h b/trunk/drivers/net/smc91x.h index 0c9f1e7dab2e..636dbfcdf8cb 100644 --- a/trunk/drivers/net/smc91x.h +++ b/trunk/drivers/net/smc91x.h @@ -398,24 +398,6 @@ static inline void LPD7_SMC_outsw (unsigned char* a, int r, #define SMC_IRQ_FLAGS (0) -#elif defined(CONFIG_ARCH_VERSATILE) - -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 1 -#define SMC_NOWAIT 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_inl(a, r) readl((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_outl(v, a, r) writel(v, (a) + (r)) -#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) -#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) - -#define SMC_IRQ_FLAGS (0) - #else #define SMC_CAN_USE_8BIT 1 diff --git a/trunk/drivers/net/spider_net.c b/trunk/drivers/net/spider_net.c index 418138dd6c68..46a009085f7c 100644 --- a/trunk/drivers/net/spider_net.c +++ b/trunk/drivers/net/spider_net.c @@ -55,13 +55,12 @@ MODULE_AUTHOR("Utz Bacher and Jens Osterkamp " \ ""); MODULE_DESCRIPTION("Spider Southbridge Gigabit Ethernet driver"); MODULE_LICENSE("GPL"); -MODULE_VERSION(VERSION); static int rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_DEFAULT; static int tx_descriptors = SPIDER_NET_TX_DESCRIPTORS_DEFAULT; -module_param(rx_descriptors, int, 0444); -module_param(tx_descriptors, int, 0444); +module_param(rx_descriptors, int, 0644); +module_param(tx_descriptors, int, 0644); MODULE_PARM_DESC(rx_descriptors, "number of descriptors used " \ "in rx chains"); @@ -301,7 +300,7 @@ static int spider_net_init_chain(struct spider_net_card *card, struct spider_net_descr_chain *chain, struct spider_net_descr *start_descr, - int no) + int direction, int no) { int i; struct spider_net_descr *descr; @@ -316,7 +315,7 @@ spider_net_init_chain(struct spider_net_card *card, buf = pci_map_single(card->pdev, descr, SPIDER_NET_DESCR_SIZE, - PCI_DMA_BIDIRECTIONAL); + direction); if (pci_dma_mapping_error(buf)) goto iommu_error; @@ -330,6 +329,11 @@ spider_net_init_chain(struct spider_net_card *card, (descr-1)->next = start_descr; start_descr->prev = descr-1; + descr = start_descr; + if (direction == PCI_DMA_FROMDEVICE) + for (i=0; i < no; i++, descr++) + descr->next_descr_addr = descr->next->bus_addr; + spin_lock_init(&chain->lock); chain->head = start_descr; chain->tail = start_descr; @@ -342,7 +346,7 @@ spider_net_init_chain(struct spider_net_card *card, if (descr->bus_addr) pci_unmap_single(card->pdev, descr->bus_addr, SPIDER_NET_DESCR_SIZE, - PCI_DMA_BIDIRECTIONAL); + direction); return -ENOMEM; } @@ -358,15 +362,15 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card) struct spider_net_descr *descr; descr = card->rx_chain.head; - do { + while (descr->next != card->rx_chain.head) { if (descr->skb) { dev_kfree_skb(descr->skb); pci_unmap_single(card->pdev, descr->buf_addr, SPIDER_NET_MAX_FRAME, - PCI_DMA_BIDIRECTIONAL); + PCI_DMA_FROMDEVICE); } descr = descr->next; - } while (descr != card->rx_chain.head); + } } /** @@ -641,41 +645,26 @@ static int spider_net_prepare_tx_descr(struct spider_net_card *card, struct sk_buff *skb) { - struct spider_net_descr *descr; + struct spider_net_descr *descr = card->tx_chain.head; dma_addr_t buf; - unsigned long flags; - int length; - length = skb->len; - if (length < ETH_ZLEN) { - if (skb_pad(skb, ETH_ZLEN-length)) - return 0; - length = ETH_ZLEN; - } - - buf = pci_map_single(card->pdev, skb->data, length, PCI_DMA_TODEVICE); + buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(buf)) { if (netif_msg_tx_err(card) && net_ratelimit()) pr_err("could not iommu-map packet (%p, %i). " - "Dropping packet\n", skb->data, length); + "Dropping packet\n", skb->data, skb->len); card->spider_stats.tx_iommu_map_error++; return -ENOMEM; } - spin_lock_irqsave(&card->tx_chain.lock, flags); - descr = card->tx_chain.head; - card->tx_chain.head = descr->next; - descr->buf_addr = buf; - descr->buf_size = length; + descr->buf_size = skb->len; descr->next_descr_addr = 0; descr->skb = skb; descr->data_status = 0; descr->dmac_cmd_status = SPIDER_NET_DESCR_CARDOWNED | SPIDER_NET_DMAC_NOCS; - spin_unlock_irqrestore(&card->tx_chain.lock, flags); - if (skb->protocol == htons(ETH_P_IP)) switch (skb->nh.iph->protocol) { case IPPROTO_TCP: @@ -686,51 +675,32 @@ spider_net_prepare_tx_descr(struct spider_net_card *card, break; } - /* Chain the bus address, so that the DMA engine finds this descr. */ descr->prev->next_descr_addr = descr->bus_addr; - card->netdev->trans_start = jiffies; /* set netdev watchdog timer */ return 0; } -static int -spider_net_set_low_watermark(struct spider_net_card *card) +/** + * spider_net_release_tx_descr - processes a used tx descriptor + * @card: card structure + * @descr: descriptor to release + * + * releases a used tx descriptor (unmapping, freeing of skb) + */ +static inline void +spider_net_release_tx_descr(struct spider_net_card *card) { - unsigned long flags; - int status; - int cnt=0; - int i; struct spider_net_descr *descr = card->tx_chain.tail; + struct sk_buff *skb; - /* Measure the length of the queue. Measurement does not - * need to be precise -- does not need a lock. */ - while (descr != card->tx_chain.head) { - status = descr->dmac_cmd_status & SPIDER_NET_DESCR_NOT_IN_USE; - if (status == SPIDER_NET_DESCR_NOT_IN_USE) - break; - descr = descr->next; - cnt++; - } - - /* If TX queue is short, don't even bother with interrupts */ - if (cnt < card->num_tx_desc/4) - return cnt; - - /* Set low-watermark 3/4th's of the way into the queue. */ - descr = card->tx_chain.tail; - cnt = (cnt*3)/4; - for (i=0;inext; + card->tx_chain.tail = card->tx_chain.tail->next; + descr->dmac_cmd_status |= SPIDER_NET_DESCR_NOT_IN_USE; - /* Set the new watermark, clear the old watermark */ - spin_lock_irqsave(&card->tx_chain.lock, flags); - descr->dmac_cmd_status |= SPIDER_NET_DESCR_TXDESFLG; - if (card->low_watermark && card->low_watermark != descr) - card->low_watermark->dmac_cmd_status = - card->low_watermark->dmac_cmd_status & ~SPIDER_NET_DESCR_TXDESFLG; - card->low_watermark = descr; - spin_unlock_irqrestore(&card->tx_chain.lock, flags); - return cnt; + /* unmap the skb */ + skb = descr->skb; + pci_unmap_single(card->pdev, descr->buf_addr, skb->len, + PCI_DMA_TODEVICE); + dev_kfree_skb_any(skb); } /** @@ -749,29 +719,21 @@ static int spider_net_release_tx_chain(struct spider_net_card *card, int brutal) { struct spider_net_descr_chain *chain = &card->tx_chain; - struct spider_net_descr *descr; - struct sk_buff *skb; - u32 buf_addr; - unsigned long flags; int status; - while (chain->tail != chain->head) { - spin_lock_irqsave(&chain->lock, flags); - descr = chain->tail; + spider_net_read_reg(card, SPIDER_NET_GDTDMACCNTR); - status = spider_net_get_descr_status(descr); + while (chain->tail != chain->head) { + status = spider_net_get_descr_status(chain->tail); switch (status) { case SPIDER_NET_DESCR_COMPLETE: card->netdev_stats.tx_packets++; - card->netdev_stats.tx_bytes += descr->skb->len; + card->netdev_stats.tx_bytes += chain->tail->skb->len; break; case SPIDER_NET_DESCR_CARDOWNED: - if (!brutal) { - spin_unlock_irqrestore(&chain->lock, flags); + if (!brutal) return 1; - } - /* fallthrough, if we release the descriptors * brutally (then we don't care about * SPIDER_NET_DESCR_CARDOWNED) */ @@ -788,25 +750,11 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal) default: card->netdev_stats.tx_dropped++; - if (!brutal) { - spin_unlock_irqrestore(&chain->lock, flags); - return 1; - } - } - - chain->tail = descr->next; - descr->dmac_cmd_status |= SPIDER_NET_DESCR_NOT_IN_USE; - skb = descr->skb; - buf_addr = descr->buf_addr; - spin_unlock_irqrestore(&chain->lock, flags); - - /* unmap the skb */ - if (skb) { - int len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; - pci_unmap_single(card->pdev, buf_addr, len, PCI_DMA_TODEVICE); - dev_kfree_skb(skb); + return 1; } + spider_net_release_tx_descr(card); } + return 0; } @@ -815,12 +763,8 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal) * @card: card structure * @descr: descriptor address to enable TX processing at * - * This routine will start the transmit DMA running if - * it is not already running. This routine ned only be - * called when queueing a new packet to an empty tx queue. - * Writes the current tx chain head as start address - * of the tx descriptor chain and enables the transmission - * DMA engine. + * spider_net_kick_tx_dma writes the current tx chain head as start address + * of the tx descriptor chain and enables the transmission DMA engine */ static inline void spider_net_kick_tx_dma(struct spider_net_card *card) @@ -860,43 +804,65 @@ spider_net_kick_tx_dma(struct spider_net_card *card) static int spider_net_xmit(struct sk_buff *skb, struct net_device *netdev) { - int cnt; struct spider_net_card *card = netdev_priv(netdev); struct spider_net_descr_chain *chain = &card->tx_chain; + struct spider_net_descr *descr = chain->head; + unsigned long flags; + int result; + + spin_lock_irqsave(&chain->lock, flags); spider_net_release_tx_chain(card, 0); - if ((chain->head->next == chain->tail->prev) || - (spider_net_prepare_tx_descr(card, skb) != 0)) { + if (chain->head->next == chain->tail->prev) { + card->netdev_stats.tx_dropped++; + result = NETDEV_TX_LOCKED; + goto out; + } + if (spider_net_get_descr_status(descr) != SPIDER_NET_DESCR_NOT_IN_USE) { card->netdev_stats.tx_dropped++; - netif_stop_queue(netdev); - return NETDEV_TX_BUSY; + result = NETDEV_TX_LOCKED; + goto out; } - cnt = spider_net_set_low_watermark(card); - if (cnt < 5) - spider_net_kick_tx_dma(card); - return NETDEV_TX_OK; + if (spider_net_prepare_tx_descr(card, skb) != 0) { + card->netdev_stats.tx_dropped++; + result = NETDEV_TX_BUSY; + goto out; + } + + result = NETDEV_TX_OK; + + spider_net_kick_tx_dma(card); + card->tx_chain.head = card->tx_chain.head->next; + +out: + spin_unlock_irqrestore(&chain->lock, flags); + netif_wake_queue(netdev); + return result; } /** * spider_net_cleanup_tx_ring - cleans up the TX ring * @card: card structure * - * spider_net_cleanup_tx_ring is called by either the tx_timer - * or from the NAPI polling routine. - * This routine releases resources associted with transmitted - * packets, including updating the queue tail pointer. + * spider_net_cleanup_tx_ring is called by the tx_timer (as we don't use + * interrupts to cleanup our TX ring) and returns sent packets to the stack + * by freeing them */ static void spider_net_cleanup_tx_ring(struct spider_net_card *card) { + unsigned long flags; + + spin_lock_irqsave(&card->tx_chain.lock, flags); + if ((spider_net_release_tx_chain(card, 0) != 0) && - (card->netdev->flags & IFF_UP)) { + (card->netdev->flags & IFF_UP)) spider_net_kick_tx_dma(card); - netif_wake_queue(card->netdev); - } + + spin_unlock_irqrestore(&card->tx_chain.lock, flags); } /** @@ -1087,7 +1053,6 @@ spider_net_poll(struct net_device *netdev, int *budget) int packets_to_do, packets_done = 0; int no_more_packets = 0; - spider_net_cleanup_tx_ring(card); packets_to_do = min(*budget, netdev->quota); while (packets_to_do) { @@ -1278,15 +1243,12 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) case SPIDER_NET_PHYINT: case SPIDER_NET_GMAC2INT: case SPIDER_NET_GMAC1INT: + case SPIDER_NET_GIPSINT: case SPIDER_NET_GFIFOINT: case SPIDER_NET_DMACINT: case SPIDER_NET_GSYSINT: break; */ - case SPIDER_NET_GIPSINT: - show_error = 0; - break; - case SPIDER_NET_GPWOPCMPINT: /* PHY write operation completed */ show_error = 0; @@ -1345,10 +1307,9 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) case SPIDER_NET_GDTDCEINT: /* chain end. If a descriptor should be sent, kick off * tx dma - if (card->tx_chain.tail != card->tx_chain.head) + if (card->tx_chain.tail == card->tx_chain.head) spider_net_kick_tx_dma(card); - */ - show_error = 0; + show_error = 0; */ break; /* case SPIDER_NET_G1TMCNTINT: not used. print a message */ @@ -1393,7 +1354,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) if (netif_msg_intr(card)) pr_err("got descriptor chain end interrupt, " "restarting DMAC %c.\n", - 'D'-(i-SPIDER_NET_GDDDCEINT)/3); + 'D'+i-SPIDER_NET_GDDDCEINT); spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); show_error = 0; @@ -1462,9 +1423,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) } if ((show_error) && (netif_msg_intr(card))) - pr_err("Got error interrupt on %s, GHIINT0STS = 0x%08x, " + pr_err("Got error interrupt, GHIINT0STS = 0x%08x, " "GHIINT1STS = 0x%08x, GHIINT2STS = 0x%08x\n", - card->netdev->name, status_reg, error_reg1, error_reg2); /* clear interrupt sources */ @@ -1500,8 +1460,6 @@ spider_net_interrupt(int irq, void *ptr) spider_net_rx_irq_off(card); netif_rx_schedule(netdev); } - if (status_reg & SPIDER_NET_TXINT) - netif_rx_schedule(netdev); if (status_reg & SPIDER_NET_ERRINT ) spider_net_handle_error_irq(card, status_reg); @@ -1641,7 +1599,7 @@ spider_net_enable_card(struct spider_net_card *card) SPIDER_NET_INT2_MASK_VALUE); spider_net_write_reg(card, SPIDER_NET_GDTDMACCNTR, - SPIDER_NET_GDTBSTA | SPIDER_NET_GDTDCEIDIS); + SPIDER_NET_GDTDCEIDIS); } /** @@ -1657,26 +1615,17 @@ int spider_net_open(struct net_device *netdev) { struct spider_net_card *card = netdev_priv(netdev); - struct spider_net_descr *descr; - int i, result; + int result; result = -ENOMEM; if (spider_net_init_chain(card, &card->tx_chain, card->descr, - card->num_tx_desc)) + PCI_DMA_TODEVICE, card->tx_desc)) goto alloc_tx_failed; - - card->low_watermark = NULL; - - /* rx_chain is after tx_chain, so offset is descr + tx_count */ if (spider_net_init_chain(card, &card->rx_chain, - card->descr + card->num_tx_desc, - card->num_rx_desc)) + card->descr + card->rx_desc, + PCI_DMA_FROMDEVICE, card->rx_desc)) goto alloc_rx_failed; - descr = card->rx_chain.head; - for (i=0; i < card->num_rx_desc; i++, descr++) - descr->next_descr_addr = descr->next->bus_addr; - /* allocate rx skbs */ if (spider_net_alloc_rx_skbs(card)) goto alloc_skbs_failed; @@ -1929,7 +1878,10 @@ spider_net_stop(struct net_device *netdev) spider_net_disable_rxdmac(card); /* release chains */ - spider_net_release_tx_chain(card, 1); + if (spin_trylock(&card->tx_chain.lock)) { + spider_net_release_tx_chain(card, 1); + spin_unlock(&card->tx_chain.lock); + } spider_net_free_chain(card, &card->tx_chain); spider_net_free_chain(card, &card->rx_chain); @@ -2060,8 +2012,8 @@ spider_net_setup_netdev(struct spider_net_card *card) card->options.rx_csum = SPIDER_NET_RX_CSUM_DEFAULT; - card->num_tx_desc = tx_descriptors; - card->num_rx_desc = rx_descriptors; + card->tx_desc = tx_descriptors; + card->rx_desc = rx_descriptors; spider_net_setup_netdev_ops(netdev); @@ -2300,8 +2252,6 @@ static struct pci_driver spider_net_driver = { */ static int __init spider_net_init(void) { - printk(KERN_INFO "Spidernet version %s.\n", VERSION); - if (rx_descriptors < SPIDER_NET_RX_DESCRIPTORS_MIN) { rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_MIN; pr_info("adjusting rx descriptors to %i.\n", rx_descriptors); diff --git a/trunk/drivers/net/spider_net.h b/trunk/drivers/net/spider_net.h index b3b46119b424..a59deda2f95e 100644 --- a/trunk/drivers/net/spider_net.h +++ b/trunk/drivers/net/spider_net.h @@ -24,8 +24,6 @@ #ifndef _SPIDER_NET_H #define _SPIDER_NET_H -#define VERSION "1.1 A" - #include "sungem_phy.h" extern int spider_net_stop(struct net_device *netdev); @@ -49,7 +47,7 @@ extern char spider_net_driver_name[]; #define SPIDER_NET_TX_DESCRIPTORS_MIN 16 #define SPIDER_NET_TX_DESCRIPTORS_MAX 512 -#define SPIDER_NET_TX_TIMER (HZ/5) +#define SPIDER_NET_TX_TIMER 20 #define SPIDER_NET_RX_CSUM_DEFAULT 1 @@ -191,9 +189,7 @@ extern char spider_net_driver_name[]; #define SPIDER_NET_MACMODE_VALUE 0x00000001 #define SPIDER_NET_BURSTLMT_VALUE 0x00000200 /* about 16 us */ -/* DMAC control register GDMACCNTR - * - * 1(0) enable r/tx dma +/* 1(0) enable r/tx dma * 0000000 fixed to 0 * * 000000 fixed to 0 @@ -202,7 +198,6 @@ extern char spider_net_driver_name[]; * * 000000 fixed to 0 * 00 burst alignment: 128 bytes - * 11 burst alignment: 1024 bytes * * 00000 fixed to 0 * 0 descr writeback size 32 bytes @@ -213,13 +208,10 @@ extern char spider_net_driver_name[]; #define SPIDER_NET_DMA_RX_VALUE 0x80000000 #define SPIDER_NET_DMA_RX_FEND_VALUE 0x00030003 /* to set TX_DMA_EN */ -#define SPIDER_NET_TX_DMA_EN 0x80000000 -#define SPIDER_NET_GDTBSTA 0x00000300 -#define SPIDER_NET_GDTDCEIDIS 0x00000002 -#define SPIDER_NET_DMA_TX_VALUE SPIDER_NET_TX_DMA_EN | \ - SPIDER_NET_GDTBSTA | \ - SPIDER_NET_GDTDCEIDIS - +#define SPIDER_NET_TX_DMA_EN 0x80000000 +#define SPIDER_NET_GDTDCEIDIS 0x00000002 +#define SPIDER_NET_DMA_TX_VALUE SPIDER_NET_TX_DMA_EN | \ + SPIDER_NET_GDTDCEIDIS #define SPIDER_NET_DMA_TX_FEND_VALUE 0x00030003 /* SPIDER_NET_UA_DESCR_VALUE is OR'ed with the unicast address */ @@ -328,10 +320,13 @@ enum spider_net_int2_status { SPIDER_NET_GRISPDNGINT }; -#define SPIDER_NET_TXINT ( (1 << SPIDER_NET_GDTFDCINT) ) +#define SPIDER_NET_TXINT ( (1 << SPIDER_NET_GTTEDINT) | \ + (1 << SPIDER_NET_GDTDCEINT) | \ + (1 << SPIDER_NET_GDTFDCINT) ) -/* We rely on flagged descriptor interrupts */ -#define SPIDER_NET_RXINT ( (1 << SPIDER_NET_GDAFDCINT) ) +/* we rely on flagged descriptor interrupts*/ +#define SPIDER_NET_RXINT ( (1 << SPIDER_NET_GDAFDCINT) | \ + (1 << SPIDER_NET_GRMFLLINT) ) #define SPIDER_NET_ERRINT ( 0xffffffff & \ (~SPIDER_NET_TXINT) & \ @@ -354,7 +349,6 @@ enum spider_net_int2_status { #define SPIDER_NET_DESCR_FORCE_END 0x50000000 /* used in rx and tx */ #define SPIDER_NET_DESCR_CARDOWNED 0xA0000000 /* used in rx and tx */ #define SPIDER_NET_DESCR_NOT_IN_USE 0xF0000000 -#define SPIDER_NET_DESCR_TXDESFLG 0x00800000 struct spider_net_descr { /* as defined by the hardware */ @@ -439,7 +433,6 @@ struct spider_net_card { struct spider_net_descr_chain tx_chain; struct spider_net_descr_chain rx_chain; - struct spider_net_descr *low_watermark; struct net_device_stats netdev_stats; @@ -455,8 +448,8 @@ struct spider_net_card { /* for ethtool */ int msg_enable; - int num_rx_desc; - int num_tx_desc; + int rx_desc; + int tx_desc; struct spider_net_extra_stats spider_stats; struct spider_net_descr descr[0]; diff --git a/trunk/drivers/net/spider_net_ethtool.c b/trunk/drivers/net/spider_net_ethtool.c index 91b995102915..589e43658dee 100644 --- a/trunk/drivers/net/spider_net_ethtool.c +++ b/trunk/drivers/net/spider_net_ethtool.c @@ -76,7 +76,7 @@ spider_net_ethtool_get_drvinfo(struct net_device *netdev, /* clear and fill out info */ memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); strncpy(drvinfo->driver, spider_net_driver_name, 32); - strncpy(drvinfo->version, VERSION, 32); + strncpy(drvinfo->version, "0.1", 32); strcpy(drvinfo->fw_version, "no information"); strncpy(drvinfo->bus_info, pci_name(card->pdev), 32); } @@ -158,9 +158,9 @@ spider_net_ethtool_get_ringparam(struct net_device *netdev, struct spider_net_card *card = netdev->priv; ering->tx_max_pending = SPIDER_NET_TX_DESCRIPTORS_MAX; - ering->tx_pending = card->num_tx_desc; + ering->tx_pending = card->tx_desc; ering->rx_max_pending = SPIDER_NET_RX_DESCRIPTORS_MAX; - ering->rx_pending = card->num_rx_desc; + ering->rx_pending = card->rx_desc; } static int spider_net_get_stats_count(struct net_device *netdev) diff --git a/trunk/drivers/net/sun3_82586.c b/trunk/drivers/net/sun3_82586.c index a3220a96524f..d1d1885b0295 100644 --- a/trunk/drivers/net/sun3_82586.c +++ b/trunk/drivers/net/sun3_82586.c @@ -330,7 +330,7 @@ struct net_device * __init sun3_82586_probe(int unit) out1: free_netdev(dev); out: - iounmap((void __iomem *)ioaddr); + iounmap((void *)ioaddr); return ERR_PTR(err); } diff --git a/trunk/drivers/net/sun3lance.c b/trunk/drivers/net/sun3lance.c index b865db363ba0..91c76544e4dd 100644 --- a/trunk/drivers/net/sun3lance.c +++ b/trunk/drivers/net/sun3lance.c @@ -286,7 +286,7 @@ struct net_device * __init sun3lance_probe(int unit) out1: #ifdef CONFIG_SUN3 - iounmap((void __iomem *)dev->base_addr); + iounmap((void *)dev->base_addr); #endif out: free_netdev(dev); @@ -326,7 +326,7 @@ static int __init lance_probe( struct net_device *dev) ioaddr_probe[1] = tmp2; #ifdef CONFIG_SUN3 - iounmap((void __iomem *)ioaddr); + iounmap((void *)ioaddr); #endif return 0; } @@ -956,7 +956,7 @@ void cleanup_module(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 - iounmap((void __iomem *)sun3lance_dev->base_addr); + iounmap((void *)sun3lance_dev->base_addr); #endif free_netdev(sun3lance_dev); } diff --git a/trunk/drivers/net/tulip/de2104x.c b/trunk/drivers/net/tulip/de2104x.c index f6b3a94e97bf..2cfd9634895a 100644 --- a/trunk/drivers/net/tulip/de2104x.c +++ b/trunk/drivers/net/tulip/de2104x.c @@ -1730,7 +1730,7 @@ static void __init de21040_get_media_info(struct de_private *de) } /* Note: this routine returns extra data bits for size detection. */ -static unsigned __devinit tulip_read_eeprom(void __iomem *regs, int location, int addr_len) +static unsigned __init tulip_read_eeprom(void __iomem *regs, int location, int addr_len) { int i; unsigned retval = 0; @@ -1926,7 +1926,7 @@ static void __init de21041_get_srom_info (struct de_private *de) goto fill_defaults; } -static int __devinit de_init_one (struct pci_dev *pdev, +static int __init de_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev; @@ -2082,7 +2082,7 @@ static int __devinit de_init_one (struct pci_dev *pdev, return rc; } -static void __devexit de_remove_one (struct pci_dev *pdev) +static void __exit de_remove_one (struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct de_private *de = dev->priv; @@ -2164,7 +2164,7 @@ static struct pci_driver de_driver = { .name = DRV_NAME, .id_table = de_pci_tbl, .probe = de_init_one, - .remove = __devexit_p(de_remove_one), + .remove = __exit_p(de_remove_one), #ifdef CONFIG_PM .suspend = de_suspend, .resume = de_resume, diff --git a/trunk/drivers/pci/Kconfig b/trunk/drivers/pci/Kconfig index ecc50db8585a..30294127a0aa 100644 --- a/trunk/drivers/pci/Kconfig +++ b/trunk/drivers/pci/Kconfig @@ -55,7 +55,7 @@ config PCI_DEBUG config HT_IRQ bool "Interrupts on hypertransport devices" default y - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC + depends on X86_LOCAL_APIC && X86_IO_APIC help This allows native hypertransport devices to use interrupts. diff --git a/trunk/drivers/rtc/rtc-sh.c b/trunk/drivers/rtc/rtc-sh.c index 143302a8e79c..8b6efcc05058 100644 --- a/trunk/drivers/rtc/rtc-sh.c +++ b/trunk/drivers/rtc/rtc-sh.c @@ -160,7 +160,7 @@ static int sh_rtc_open(struct device *dev) tmp |= RCR1_CIE; writeb(tmp, rtc->regbase + RCR1); - ret = request_irq(rtc->periodic_irq, sh_rtc_periodic, IRQF_DISABLED, + ret = request_irq(rtc->periodic_irq, sh_rtc_periodic, SA_INTERRUPT, "sh-rtc period", dev); if (unlikely(ret)) { dev_err(dev, "request period IRQ failed with %d, IRQ %d\n", @@ -168,7 +168,7 @@ static int sh_rtc_open(struct device *dev) return ret; } - ret = request_irq(rtc->carry_irq, sh_rtc_interrupt, IRQF_DISABLED, + ret = request_irq(rtc->carry_irq, sh_rtc_interrupt, SA_INTERRUPT, "sh-rtc carry", dev); if (unlikely(ret)) { dev_err(dev, "request carry IRQ failed with %d, IRQ %d\n", @@ -177,7 +177,7 @@ static int sh_rtc_open(struct device *dev) goto err_bad_carry; } - ret = request_irq(rtc->alarm_irq, sh_rtc_interrupt, IRQF_DISABLED, + ret = request_irq(rtc->alarm_irq, sh_rtc_interrupt, SA_INTERRUPT, "sh-rtc alarm", dev); if (unlikely(ret)) { dev_err(dev, "request alarm IRQ failed with %d, IRQ %d\n", diff --git a/trunk/drivers/s390/char/monwriter.c b/trunk/drivers/s390/char/monwriter.c index abd02ed501cb..4362ff260244 100644 --- a/trunk/drivers/s390/char/monwriter.c +++ b/trunk/drivers/s390/char/monwriter.c @@ -110,7 +110,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) monbuf = kzalloc(sizeof(struct mon_buf), GFP_KERNEL); if (!monbuf) return -ENOMEM; - monbuf->data = kzalloc(monhdr->datalen, + monbuf->data = kzalloc(monbuf->hdr.datalen, GFP_KERNEL | GFP_DMA); if (!monbuf->data) { kfree(monbuf); diff --git a/trunk/drivers/s390/cio/chsc.c b/trunk/drivers/s390/cio/chsc.c index 2d78f0f4a40f..07c7f19339d2 100644 --- a/trunk/drivers/s390/cio/chsc.c +++ b/trunk/drivers/s390/cio/chsc.c @@ -370,7 +370,7 @@ __s390_process_res_acc(struct subchannel_id schid, void *data) struct res_acc_data *res_data; struct subchannel *sch; - res_data = data; + res_data = (struct res_acc_data *)data; sch = get_subchannel_by_schid(schid); if (!sch) /* Check if a subchannel is newly available. */ @@ -444,7 +444,7 @@ __get_chpid_from_lir(void *data) u32 isinfo[28]; } *lir; - lir = data; + lir = (struct lir*) data; if (!(lir->iq&0x80)) /* NULL link incident record */ return -EINVAL; @@ -628,7 +628,7 @@ __chp_add(struct subchannel_id schid, void *data) struct channel_path *chp; struct subchannel *sch; - chp = data; + chp = (struct channel_path *)data; sch = get_subchannel_by_schid(schid); if (!sch) /* Check if the subchannel is now available. */ @@ -707,7 +707,8 @@ chp_process_crw(int chpid, int on) return chp_add(chpid); } -static inline int check_for_io_on_path(struct subchannel *sch, int index) +static inline int +__check_for_io_and_kill(struct subchannel *sch, int index) { int cc; @@ -717,8 +718,10 @@ static inline int check_for_io_on_path(struct subchannel *sch, int index) cc = stsch(sch->schid, &sch->schib); if (cc) return 0; - if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) + if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) { + device_set_waiting(sch); return 1; + } return 0; } @@ -747,10 +750,12 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on) } else { sch->opm &= ~(0x80 >> chp); sch->lpm &= ~(0x80 >> chp); - if (check_for_io_on_path(sch, chp)) - /* Path verification is done after killing. */ - device_kill_io(sch); - else if (!sch->lpm) { + /* + * Give running I/O a grace period in which it + * can successfully terminate, even using the + * just varied off path. Then kill it. + */ + if (!__check_for_io_and_kill(sch, chp) && !sch->lpm) { if (css_enqueue_subchannel_slow(sch->schid)) { css_clear_subchannel_slow_list(); need_rescan = 1; diff --git a/trunk/drivers/s390/cio/cio.c b/trunk/drivers/s390/cio/cio.c index 8936e460a807..f18b1623cad7 100644 --- a/trunk/drivers/s390/cio/cio.c +++ b/trunk/drivers/s390/cio/cio.c @@ -609,8 +609,8 @@ do_IRQ (struct pt_regs *regs) struct irb *irb; struct pt_regs *old_regs; + irq_enter (); old_regs = set_irq_regs(regs); - irq_enter(); asm volatile ("mc 0,0"); if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) /** @@ -655,8 +655,8 @@ do_IRQ (struct pt_regs *regs) * out of the sie which costs more cycles than it saves. */ } while (!MACHINE_IS_VM && tpi (NULL) != 0); - irq_exit(); set_irq_regs(old_regs); + irq_exit (); } #ifdef CONFIG_CCW_CONSOLE diff --git a/trunk/drivers/s390/cio/css.c b/trunk/drivers/s390/cio/css.c index a2dee5bf5a17..7086a74e9871 100644 --- a/trunk/drivers/s390/cio/css.c +++ b/trunk/drivers/s390/cio/css.c @@ -177,7 +177,7 @@ get_subchannel_by_schid(struct subchannel_id schid) struct device *dev; dev = bus_find_device(&css_bus_type, NULL, - &schid, check_subchannel); + (void *)&schid, check_subchannel); return dev ? to_subchannel(dev) : NULL; } diff --git a/trunk/drivers/s390/cio/css.h b/trunk/drivers/s390/cio/css.h index 4c2ff8336288..8aabb4adeb5f 100644 --- a/trunk/drivers/s390/cio/css.h +++ b/trunk/drivers/s390/cio/css.h @@ -76,8 +76,9 @@ struct ccw_device_private { int state; /* device state */ atomic_t onoff; unsigned long registered; - struct ccw_dev_id dev_id; /* device id */ - struct subchannel_id schid; /* subchannel number */ + __u16 devno; /* device number */ + __u16 sch_no; /* subchannel number */ + __u8 ssid; /* subchannel set id */ __u8 imask; /* lpm mask for SNID/SID/SPGID */ int iretry; /* retry counter SNID/SID/SPGID */ struct { @@ -170,7 +171,7 @@ void device_trigger_reprobe(struct subchannel *); /* Helper functions for vary on/off. */ int device_is_online(struct subchannel *); -void device_kill_io(struct subchannel *); +void device_set_waiting(struct subchannel *); /* Machine check helper function. */ void device_kill_pending_timer(struct subchannel *); diff --git a/trunk/drivers/s390/cio/device.c b/trunk/drivers/s390/cio/device.c index 94bdd4d8a4c9..688945662c15 100644 --- a/trunk/drivers/s390/cio/device.c +++ b/trunk/drivers/s390/cio/device.c @@ -552,19 +552,21 @@ ccw_device_register(struct ccw_device *cdev) } struct match_data { - struct ccw_dev_id dev_id; + unsigned int devno; + unsigned int ssid; struct ccw_device * sibling; }; static int match_devno(struct device * dev, void * data) { - struct match_data * d = data; + struct match_data * d = (struct match_data *)data; struct ccw_device * cdev; cdev = to_ccwdev(dev); if ((cdev->private->state == DEV_STATE_DISCONNECTED) && - ccw_dev_id_is_equal(&cdev->private->dev_id, &d->dev_id) && + (cdev->private->devno == d->devno) && + (cdev->private->ssid == d->ssid) && (cdev != d->sibling)) { cdev->private->state = DEV_STATE_NOT_OPER; return 1; @@ -572,13 +574,15 @@ match_devno(struct device * dev, void * data) return 0; } -static struct ccw_device * get_disc_ccwdev_by_dev_id(struct ccw_dev_id *dev_id, - struct ccw_device *sibling) +static struct ccw_device * +get_disc_ccwdev_by_devno(unsigned int devno, unsigned int ssid, + struct ccw_device *sibling) { struct device *dev; struct match_data data; - data.dev_id = *dev_id; + data.devno = devno; + data.ssid = ssid; data.sibling = sibling; dev = bus_find_device(&ccw_bus_type, NULL, &data, match_devno); @@ -591,7 +595,7 @@ ccw_device_add_changed(void *data) struct ccw_device *cdev; - cdev = data; + cdev = (struct ccw_device *)data; if (device_add(&cdev->dev)) { put_device(&cdev->dev); return; @@ -612,9 +616,9 @@ ccw_device_do_unreg_rereg(void *data) struct subchannel *sch; int need_rename; - cdev = data; + cdev = (struct ccw_device *)data; sch = to_subchannel(cdev->dev.parent); - if (cdev->private->dev_id.devno != sch->schib.pmcw.dev) { + if (cdev->private->devno != sch->schib.pmcw.dev) { /* * The device number has changed. This is usually only when * a device has been detached under VM and then re-appeared @@ -629,12 +633,10 @@ ccw_device_do_unreg_rereg(void *data) * get possibly sick... */ struct ccw_device *other_cdev; - struct ccw_dev_id dev_id; need_rename = 1; - dev_id.devno = sch->schib.pmcw.dev; - dev_id.ssid = sch->schid.ssid; - other_cdev = get_disc_ccwdev_by_dev_id(&dev_id, cdev); + other_cdev = get_disc_ccwdev_by_devno(sch->schib.pmcw.dev, + sch->schid.ssid, cdev); if (other_cdev) { struct subchannel *other_sch; @@ -650,7 +652,7 @@ ccw_device_do_unreg_rereg(void *data) } /* Update ssd info here. */ css_get_ssd_info(sch); - cdev->private->dev_id.devno = sch->schib.pmcw.dev; + cdev->private->devno = sch->schib.pmcw.dev; } else need_rename = 0; device_remove_files(&cdev->dev); @@ -660,7 +662,7 @@ ccw_device_do_unreg_rereg(void *data) snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", sch->schid.ssid, sch->schib.pmcw.dev); PREPARE_WORK(&cdev->private->kick_work, - ccw_device_add_changed, cdev); + ccw_device_add_changed, (void *)cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } @@ -685,7 +687,7 @@ io_subchannel_register(void *data) int ret; unsigned long flags; - cdev = data; + cdev = (struct ccw_device *) data; sch = to_subchannel(cdev->dev.parent); if (klist_node_attached(&cdev->dev.knode_parent)) { @@ -757,7 +759,7 @@ io_subchannel_recog_done(struct ccw_device *cdev) break; sch = to_subchannel(cdev->dev.parent); PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, cdev); + ccw_device_call_sch_unregister, (void *) cdev); queue_work(slow_path_wq, &cdev->private->kick_work); if (atomic_dec_and_test(&ccw_device_init_count)) wake_up(&ccw_device_init_wq); @@ -772,7 +774,7 @@ io_subchannel_recog_done(struct ccw_device *cdev) if (!get_device(&cdev->dev)) break; PREPARE_WORK(&cdev->private->kick_work, - io_subchannel_register, cdev); + io_subchannel_register, (void *) cdev); queue_work(slow_path_wq, &cdev->private->kick_work); break; } @@ -790,9 +792,9 @@ io_subchannel_recog(struct ccw_device *cdev, struct subchannel *sch) /* Init private data. */ priv = cdev->private; - priv->dev_id.devno = sch->schib.pmcw.dev; - priv->dev_id.ssid = sch->schid.ssid; - priv->schid = sch->schid; + priv->devno = sch->schib.pmcw.dev; + priv->ssid = sch->schid.ssid; + priv->sch_no = sch->schid.sch_no; priv->state = DEV_STATE_NOT_OPER; INIT_LIST_HEAD(&priv->cmb_list); init_waitqueue_head(&priv->wait_q); @@ -910,7 +912,7 @@ io_subchannel_remove (struct subchannel *sch) */ if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_unregister, cdev); + ccw_device_unregister, (void *) cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } return 0; @@ -1053,7 +1055,7 @@ __ccwdev_check_busid(struct device *dev, void *id) { char *bus_id; - bus_id = id; + bus_id = (char *)id; return (strncmp(bus_id, dev->bus_id, BUS_ID_SIZE) == 0); } diff --git a/trunk/drivers/s390/cio/device.h b/trunk/drivers/s390/cio/device.h index c6140cc97a80..00be9a5b4acd 100644 --- a/trunk/drivers/s390/cio/device.h +++ b/trunk/drivers/s390/cio/device.h @@ -21,6 +21,7 @@ enum dev_state { /* states to wait for i/o completion before doing something */ DEV_STATE_CLEAR_VERIFY, DEV_STATE_TIMEOUT_KILL, + DEV_STATE_WAIT4IO, DEV_STATE_QUIESCE, /* special states for devices gone not operational */ DEV_STATE_DISCONNECTED, diff --git a/trunk/drivers/s390/cio/device_fsm.c b/trunk/drivers/s390/cio/device_fsm.c index fcaf28d7b4eb..b67620208f36 100644 --- a/trunk/drivers/s390/cio/device_fsm.c +++ b/trunk/drivers/s390/cio/device_fsm.c @@ -59,6 +59,18 @@ device_set_disconnected(struct subchannel *sch) cdev->private->state = DEV_STATE_DISCONNECTED; } +void +device_set_waiting(struct subchannel *sch) +{ + struct ccw_device *cdev; + + if (!sch->dev.driver_data) + return; + cdev = sch->dev.driver_data; + ccw_device_set_timeout(cdev, 10*HZ); + cdev->private->state = DEV_STATE_WAIT4IO; +} + /* * Timeout function. It just triggers a DEV_EVENT_TIMEOUT. */ @@ -171,9 +183,9 @@ ccw_device_handle_oper(struct ccw_device *cdev) cdev->id.cu_model != cdev->private->senseid.cu_model || cdev->id.dev_type != cdev->private->senseid.dev_type || cdev->id.dev_model != cdev->private->senseid.dev_model || - cdev->private->dev_id.devno != sch->schib.pmcw.dev) { + cdev->private->devno != sch->schib.pmcw.dev) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_do_unreg_rereg, cdev); + ccw_device_do_unreg_rereg, (void *)cdev); queue_work(ccw_device_work, &cdev->private->kick_work); return 0; } @@ -243,7 +255,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) case DEV_STATE_NOT_OPER: CIO_DEBUG(KERN_WARNING, 2, "SenseID : unknown device %04x on subchannel " - "0.%x.%04x\n", cdev->private->dev_id.devno, + "0.%x.%04x\n", cdev->private->devno, sch->schid.ssid, sch->schid.sch_no); break; case DEV_STATE_OFFLINE: @@ -270,15 +282,14 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) CIO_DEBUG(KERN_INFO, 2, "SenseID : device 0.%x.%04x reports: " "CU Type/Mod = %04X/%02X, Dev Type/Mod = " "%04X/%02X\n", - cdev->private->dev_id.ssid, - cdev->private->dev_id.devno, + cdev->private->ssid, cdev->private->devno, cdev->id.cu_type, cdev->id.cu_model, cdev->id.dev_type, cdev->id.dev_model); break; case DEV_STATE_BOXED: CIO_DEBUG(KERN_WARNING, 2, "SenseID : boxed device %04x on subchannel " - "0.%x.%04x\n", cdev->private->dev_id.devno, + "0.%x.%04x\n", cdev->private->devno, sch->schid.ssid, sch->schid.sch_no); break; } @@ -314,13 +325,13 @@ ccw_device_oper_notify(void *data) struct subchannel *sch; int ret; - cdev = data; + cdev = (struct ccw_device *)data; sch = to_subchannel(cdev->dev.parent); ret = (sch->driver && sch->driver->notify) ? sch->driver->notify(&sch->dev, CIO_OPER) : 0; if (!ret) /* Driver doesn't want device back. */ - ccw_device_do_unreg_rereg(cdev); + ccw_device_do_unreg_rereg((void *)cdev); else { /* Reenable channel measurements, if needed. */ cmf_reenable(cdev); @@ -352,12 +363,12 @@ ccw_device_done(struct ccw_device *cdev, int state) if (state == DEV_STATE_BOXED) CIO_DEBUG(KERN_WARNING, 2, "Boxed device %04x on subchannel %04x\n", - cdev->private->dev_id.devno, sch->schid.sch_no); + cdev->private->devno, sch->schid.sch_no); if (cdev->private->flags.donotify) { cdev->private->flags.donotify = 0; PREPARE_WORK(&cdev->private->kick_work, ccw_device_oper_notify, - cdev); + (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } wake_up(&cdev->private->wait_q); @@ -401,8 +412,7 @@ static void __ccw_device_get_common_pgid(struct ccw_device *cdev) /* PGID mismatch, can't pathgroup. */ CIO_MSG_EVENT(0, "SNID - pgid mismatch for device " "0.%x.%04x, can't pathgroup\n", - cdev->private->dev_id.ssid, - cdev->private->dev_id.devno); + cdev->private->ssid, cdev->private->devno); cdev->private->options.pgroup = 0; return; } @@ -513,7 +523,7 @@ ccw_device_nopath_notify(void *data) struct subchannel *sch; int ret; - cdev = data; + cdev = (struct ccw_device *)data; sch = to_subchannel(cdev->dev.parent); /* Extra sanity. */ if (sch->lpm) @@ -527,7 +537,7 @@ ccw_device_nopath_notify(void *data) if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, ccw_device_call_sch_unregister, - cdev); + (void *)cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } else @@ -582,7 +592,7 @@ ccw_device_verify_done(struct ccw_device *cdev, int err) break; default: PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); + ccw_device_nopath_notify, (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); ccw_device_done(cdev, DEV_STATE_NOT_OPER); break; @@ -713,7 +723,7 @@ ccw_device_offline_notoper(struct ccw_device *cdev, enum dev_event dev_event) sch = to_subchannel(cdev->dev.parent); if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, cdev); + ccw_device_call_sch_unregister, (void *)cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } wake_up(&cdev->private->wait_q); @@ -744,7 +754,7 @@ ccw_device_online_notoper(struct ccw_device *cdev, enum dev_event dev_event) } if (get_device(&cdev->dev)) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, cdev); + ccw_device_call_sch_unregister, (void *)cdev); queue_work(ccw_device_work, &cdev->private->kick_work); } wake_up(&cdev->private->wait_q); @@ -849,7 +859,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) sch = to_subchannel(cdev->dev.parent); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); + ccw_device_nopath_notify, (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else @@ -875,8 +885,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event) /* Basic sense hasn't started. Try again. */ ccw_device_do_sense(cdev, irb); else { - printk(KERN_INFO "Huh? %s(%s): unsolicited " - "interrupt...\n", + printk("Huh? %s(%s): unsolicited interrupt...\n", __FUNCTION__, cdev->dev.bus_id); if (cdev->handler) cdev->handler (cdev, 0, irb); @@ -935,10 +944,10 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->state = DEV_STATE_ONLINE; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(-ETIMEDOUT)); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); + ccw_device_nopath_notify, (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else if (cdev->private->flags.doverify) /* Start delayed path verification. */ @@ -961,7 +970,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) sch = to_subchannel(cdev->dev.parent); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); + ccw_device_nopath_notify, (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else @@ -972,15 +981,51 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->state = DEV_STATE_ONLINE; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(-ETIMEDOUT)); +} + +static void +ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event) +{ + struct irb *irb; + struct subchannel *sch; + + irb = (struct irb *) __LC_IRB; + /* + * Accumulate status and find out if a basic sense is needed. + * This is fine since we have already adapted the lpm. + */ + ccw_device_accumulate_irb(cdev, irb); + if (cdev->private->flags.dosense) { + if (ccw_device_do_sense(cdev, irb) == 0) { + cdev->private->state = DEV_STATE_W4SENSE; + } + return; + } + + /* Iff device is idle, reset timeout. */ + sch = to_subchannel(cdev->dev.parent); + if (!stsch(sch->schid, &sch->schib)) + if (sch->schib.scsw.actl == 0) + ccw_device_set_timeout(cdev, 0); + /* Call the handler. */ + ccw_device_call_handler(cdev); + if (!sch->lpm) { + PREPARE_WORK(&cdev->private->kick_work, + ccw_device_nopath_notify, (void *)cdev); + queue_work(ccw_device_notify_work, &cdev->private->kick_work); + } else if (cdev->private->flags.doverify) + ccw_device_online_verify(cdev, 0); } -void device_kill_io(struct subchannel *sch) +static void +ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event) { int ret; - struct ccw_device *cdev; + struct subchannel *sch; - cdev = sch->dev.driver_data; + sch = to_subchannel(cdev->dev.parent); + ccw_device_set_timeout(cdev, 0); ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); @@ -990,7 +1035,7 @@ void device_kill_io(struct subchannel *sch) if (ret == -ENODEV) { if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); + ccw_device_nopath_notify, (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); } else @@ -999,12 +1044,12 @@ void device_kill_io(struct subchannel *sch) } if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(-ETIMEDOUT)); if (!sch->lpm) { PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); + ccw_device_nopath_notify, (void *)cdev); queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else + } else if (cdev->private->flags.doverify) /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); } @@ -1241,6 +1286,12 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_TIMEOUT] = ccw_device_killing_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, //FIXME }, + [DEV_STATE_WAIT4IO] = { + [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_INTERRUPT] = ccw_device_wait4io_irq, + [DEV_EVENT_TIMEOUT] = ccw_device_wait4io_timeout, + [DEV_EVENT_VERIFY] = ccw_device_delay_verify, + }, [DEV_STATE_QUIESCE] = { [DEV_EVENT_NOTOPER] = ccw_device_quiesce_done, [DEV_EVENT_INTERRUPT] = ccw_device_quiesce_done, diff --git a/trunk/drivers/s390/cio/device_id.c b/trunk/drivers/s390/cio/device_id.c index a74785b9e4eb..1398367b5f68 100644 --- a/trunk/drivers/s390/cio/device_id.c +++ b/trunk/drivers/s390/cio/device_id.c @@ -251,7 +251,7 @@ ccw_device_check_sense_id(struct ccw_device *cdev) */ CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel " "0.%x.%04x reports cmd reject\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no); return -EOPNOTSUPP; } @@ -259,8 +259,7 @@ ccw_device_check_sense_id(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SenseID : UC on dev 0.%x.%04x, " "lpum %02X, cnt %02d, sns :" " %02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->dev_id.ssid, - cdev->private->dev_id.devno, + cdev->private->ssid, cdev->private->devno, irb->esw.esw0.sublog.lpum, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], @@ -275,15 +274,14 @@ ccw_device_check_sense_id(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x " "on subchannel 0.%x.%04x is " "'not operational'\n", sch->orb.lpm, - cdev->private->dev_id.devno, - sch->schid.ssid, sch->schid.sch_no); + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no); return -EACCES; } /* Hmm, whatever happened, try again. */ CIO_MSG_EVENT(2, "SenseID : start_IO() for device %04x on " "subchannel 0.%x.%04x returns status %02X%02X\n", - cdev->private->dev_id.devno, sch->schid.ssid, - sch->schid.sch_no, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, irb->scsw.dstat, irb->scsw.cstat); return -EAGAIN; } @@ -332,7 +330,7 @@ ccw_device_sense_id_irq(struct ccw_device *cdev, enum dev_event dev_event) /* fall through. */ default: /* Sense ID failed. Try asking VM. */ if (MACHINE_IS_VM) { - VM_virtual_device_info (cdev->private->dev_id.devno, + VM_virtual_device_info (cdev->private->devno, &cdev->private->senseid); if (cdev->private->senseid.cu_type != 0xFFFF) { /* Got the device information from VM. */ diff --git a/trunk/drivers/s390/cio/device_ops.c b/trunk/drivers/s390/cio/device_ops.c index b39c1fa48acd..84b9b18eabc2 100644 --- a/trunk/drivers/s390/cio/device_ops.c +++ b/trunk/drivers/s390/cio/device_ops.c @@ -50,6 +50,7 @@ ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; if (cdev->private->state != DEV_STATE_ONLINE && + cdev->private->state != DEV_STATE_WAIT4IO && cdev->private->state != DEV_STATE_W4SENSE) return -EINVAL; sch = to_subchannel(cdev->dev.parent); @@ -154,6 +155,7 @@ ccw_device_halt(struct ccw_device *cdev, unsigned long intparm) if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; if (cdev->private->state != DEV_STATE_ONLINE && + cdev->private->state != DEV_STATE_WAIT4IO && cdev->private->state != DEV_STATE_W4SENSE) return -EINVAL; sch = to_subchannel(cdev->dev.parent); @@ -590,13 +592,13 @@ ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no) int _ccw_device_get_subchannel_number(struct ccw_device *cdev) { - return cdev->private->schid.sch_no; + return cdev->private->sch_no; } int _ccw_device_get_device_number(struct ccw_device *cdev) { - return cdev->private->dev_id.devno; + return cdev->private->devno; } diff --git a/trunk/drivers/s390/cio/device_pgid.c b/trunk/drivers/s390/cio/device_pgid.c index 2975ce888c19..84917b39de45 100644 --- a/trunk/drivers/s390/cio/device_pgid.c +++ b/trunk/drivers/s390/cio/device_pgid.c @@ -79,8 +79,7 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not " "operational'\n", - cdev->private->dev_id.devno, - sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); } @@ -136,8 +135,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SNID - device 0.%x.%04x, unit check, " "lpum %02X, cnt %02d, sns : " "%02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->dev_id.ssid, - cdev->private->dev_id.devno, + cdev->private->ssid, cdev->private->devno, irb->esw.esw0.sublog.lpum, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], @@ -149,7 +147,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, sch->orb.lpm); return -EACCES; } @@ -157,7 +155,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) if (cdev->private->pgid[i].inf.ps.state2 == SNID_STATE2_RESVD_ELSE) { CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x " "is reserved by someone else\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no); return -EUSERS; } @@ -263,7 +261,7 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func) /* PGID command failed on this path. */ CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not operational'\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return ret; } @@ -303,7 +301,7 @@ static int __ccw_device_do_nop(struct ccw_device *cdev) /* nop command failed on this path. */ CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not operational'\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return ret; } @@ -330,9 +328,8 @@ __ccw_device_check_pgid(struct ccw_device *cdev) CIO_MSG_EVENT(2, "SPID - device 0.%x.%04x, unit check, " "cnt %02d, " "sns : %02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->dev_id.ssid, - cdev->private->dev_id.devno, - irb->esw.esw0.erw.scnt, + cdev->private->ssid, + cdev->private->devno, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], irb->ecw[2], irb->ecw[3], irb->ecw[4], irb->ecw[5], @@ -342,7 +339,7 @@ __ccw_device_check_pgid(struct ccw_device *cdev) if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return -EACCES; } @@ -365,7 +362,7 @@ static int __ccw_device_check_nop(struct ccw_device *cdev) if (irb->scsw.cc == 3) { CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel 0.%x.%04x," " lpm %02X, became 'not operational'\n", - cdev->private->dev_id.devno, sch->schid.ssid, + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, cdev->private->imask); return -EACCES; } diff --git a/trunk/drivers/s390/cio/device_status.c b/trunk/drivers/s390/cio/device_status.c index 3f7cbce4cd87..caf148d5caad 100644 --- a/trunk/drivers/s390/cio/device_status.c +++ b/trunk/drivers/s390/cio/device_status.c @@ -32,18 +32,19 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK))) return; + CIO_MSG_EVENT(0, "Channel-Check or Interface-Control-Check " "received" " ... device %04x on subchannel 0.%x.%04x, dev_stat " ": %02X sch_stat : %02X\n", - cdev->private->dev_id.devno, cdev->private->schid.ssid, - cdev->private->schid.sch_no, + cdev->private->devno, cdev->private->ssid, + cdev->private->sch_no, irb->scsw.dstat, irb->scsw.cstat); if (irb->scsw.cc != 3) { char dbf_text[15]; - sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no); + sprintf(dbf_text, "chk%x", cdev->private->sch_no); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, irb, sizeof (struct irb)); } diff --git a/trunk/drivers/s390/cio/qdio.c b/trunk/drivers/s390/cio/qdio.c index 0648ce5bb684..cde822d8b5c8 100644 --- a/trunk/drivers/s390/cio/qdio.c +++ b/trunk/drivers/s390/cio/qdio.c @@ -1741,7 +1741,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, void *ptr; int available; - sprintf(dbf_text,"qfqs%4x",cdev->private->schid.sch_no); + sprintf(dbf_text,"qfqs%4x",cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); for (i=0;iinput_qs[i]; @@ -2924,7 +2924,7 @@ qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat) irq_ptr = cdev->private->qdio_data; - sprintf(dbf_text,"qehi%4x",cdev->private->schid.sch_no); + sprintf(dbf_text,"qehi%4x",cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2943,7 +2943,7 @@ qdio_initialize(struct qdio_initialize *init_data) int rc; char dbf_text[15]; - sprintf(dbf_text,"qini%4x",init_data->cdev->private->schid.sch_no); + sprintf(dbf_text,"qini%4x",init_data->cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2964,7 +2964,7 @@ qdio_allocate(struct qdio_initialize *init_data) struct qdio_irq *irq_ptr; char dbf_text[15]; - sprintf(dbf_text,"qalc%4x",init_data->cdev->private->schid.sch_no); + sprintf(dbf_text,"qalc%4x",init_data->cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) || @@ -3187,7 +3187,7 @@ qdio_establish(struct qdio_initialize *init_data) tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET); } - sprintf(dbf_text,"qest%4x",cdev->private->schid.sch_no); + sprintf(dbf_text,"qest%4x",cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); diff --git a/trunk/drivers/sbus/char/bbc_envctrl.c b/trunk/drivers/sbus/char/bbc_envctrl.c index 0d3660c28f7d..d27e4f6d7045 100644 --- a/trunk/drivers/sbus/char/bbc_envctrl.c +++ b/trunk/drivers/sbus/char/bbc_envctrl.c @@ -4,8 +4,10 @@ * Copyright (C) 2001 David S. Miller (davem@redhat.com) */ +#include #include -#include +#include +#include #include #include #include diff --git a/trunk/drivers/sbus/char/envctrl.c b/trunk/drivers/sbus/char/envctrl.c index 6b6a855f3795..728a133d0fc5 100644 --- a/trunk/drivers/sbus/char/envctrl.c +++ b/trunk/drivers/sbus/char/envctrl.c @@ -20,12 +20,16 @@ */ #include -#include +#include #include +#include #include #include +#include #include -#include +#include +#include +#include #include #include diff --git a/trunk/drivers/scsi/aha152x.c b/trunk/drivers/scsi/aha152x.c index 306f46b85a55..a0d1cee0be77 100644 --- a/trunk/drivers/scsi/aha152x.c +++ b/trunk/drivers/scsi/aha152x.c @@ -238,7 +238,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/trunk/drivers/scsi/dtc.c b/trunk/drivers/scsi/dtc.c index 54756722dd5f..0d5713dfa204 100644 --- a/trunk/drivers/scsi/dtc.c +++ b/trunk/drivers/scsi/dtc.c @@ -82,7 +82,7 @@ #include #include #include -#include +#include #include "scsi.h" #include #include "dtc.h" diff --git a/trunk/drivers/scsi/fdomain.c b/trunk/drivers/scsi/fdomain.c index 72794a7b6dcc..41b05fc45380 100644 --- a/trunk/drivers/scsi/fdomain.c +++ b/trunk/drivers/scsi/fdomain.c @@ -278,9 +278,9 @@ #include #include #include -#include #include +#include #include #include diff --git a/trunk/drivers/scsi/seagate.c b/trunk/drivers/scsi/seagate.c index 5ffec2721b28..8ff1f2866f7b 100644 --- a/trunk/drivers/scsi/seagate.c +++ b/trunk/drivers/scsi/seagate.c @@ -97,8 +97,8 @@ #include #include #include -#include +#include #include #include diff --git a/trunk/drivers/scsi/t128.c b/trunk/drivers/scsi/t128.c index 0b7a70f61e0d..2df6747cb76f 100644 --- a/trunk/drivers/scsi/t128.c +++ b/trunk/drivers/scsi/t128.c @@ -109,7 +109,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/trunk/drivers/scsi/wd7000.c b/trunk/drivers/scsi/wd7000.c index 30be76514c43..331e1cf159b0 100644 --- a/trunk/drivers/scsi/wd7000.c +++ b/trunk/drivers/scsi/wd7000.c @@ -178,10 +178,10 @@ #include #include #include -#include #include #include +#include #include #include diff --git a/trunk/drivers/serial/sh-sci.c b/trunk/drivers/serial/sh-sci.c index cfcc3caf49d8..266aa325569e 100644 --- a/trunk/drivers/serial/sh-sci.c +++ b/trunk/drivers/serial/sh-sci.c @@ -808,7 +808,7 @@ static int sci_request_irq(struct sci_port *port) } if (request_irq(port->irqs[0], sci_mpxed_interrupt, - IRQF_DISABLED, "sci", port)) { + SA_INTERRUPT, "sci", port)) { printk(KERN_ERR "sci: Cannot allocate irq.\n"); return -ENODEV; } @@ -817,7 +817,7 @@ static int sci_request_irq(struct sci_port *port) if (!port->irqs[i]) continue; if (request_irq(port->irqs[i], handlers[i], - IRQF_DISABLED, desc[i], port)) { + SA_INTERRUPT, desc[i], port)) { printk(KERN_ERR "sci: Cannot allocate irq.\n"); return -ENODEV; } diff --git a/trunk/drivers/serial/sunzilog.c b/trunk/drivers/serial/sunzilog.c index b2cc703b2b9e..73dd2eedaaad 100644 --- a/trunk/drivers/serial/sunzilog.c +++ b/trunk/drivers/serial/sunzilog.c @@ -1182,7 +1182,7 @@ static int __init sunzilog_console_setup(struct console *con, char *options) return 0; } -static struct console sunzilog_console_ops = { +static struct console sunzilog_console = { .name = "ttyS", .write = sunzilog_console_write, .device = uart_console_device, @@ -1208,10 +1208,10 @@ static inline struct console *SUNZILOG_CONSOLE(void) if (i == NUM_CHANNELS) return NULL; - sunzilog_console_ops.index = i; + sunzilog_console.index = i; sunzilog_port_table[i].flags |= SUNZILOG_FLAG_IS_CONS; - return &sunzilog_console_ops; + return &sunzilog_console; } #else diff --git a/trunk/drivers/video/Kconfig b/trunk/drivers/video/Kconfig index 7a43020fa583..daaa486159cf 100644 --- a/trunk/drivers/video/Kconfig +++ b/trunk/drivers/video/Kconfig @@ -701,6 +701,7 @@ config FB_NVIDIA depends on FB && PCI select I2C_ALGOBIT if FB_NVIDIA_I2C select I2C if FB_NVIDIA_I2C + select FB_DDC if FB_NVIDIA_I2C select FB_MODE_HELPERS select FB_CFB_FILLRECT select FB_CFB_COPYAREA diff --git a/trunk/drivers/video/nvidia/nv_i2c.c b/trunk/drivers/video/nvidia/nv_i2c.c index 19eef3a09023..e48de3c9fd13 100644 --- a/trunk/drivers/video/nvidia/nv_i2c.c +++ b/trunk/drivers/video/nvidia/nv_i2c.c @@ -160,51 +160,12 @@ void nvidia_delete_i2c_busses(struct nvidia_par *par) } -static u8 *nvidia_do_probe_i2c_edid(struct nvidia_i2c_chan *chan) -{ - u8 start = 0x0; - struct i2c_msg msgs[] = { - { - .addr = 0x50, - .len = 1, - .buf = &start, - }, { - .addr = 0x50, - .flags = I2C_M_RD, - .len = EDID_LENGTH, - }, - }; - u8 *buf; - - if (!chan->par) - return NULL; - - buf = kmalloc(EDID_LENGTH, GFP_KERNEL); - if (!buf) { - dev_warn(&chan->par->pci_dev->dev, "Out of memory!\n"); - return NULL; - } - msgs[1].buf = buf; - - if (i2c_transfer(&chan->adapter, msgs, 2) == 2) - return buf; - dev_dbg(&chan->par->pci_dev->dev, "Unable to read EDID block.\n"); - kfree(buf); - return NULL; -} - int nvidia_probe_i2c_connector(struct fb_info *info, int conn, u8 **out_edid) { struct nvidia_par *par = info->par; - u8 *edid = NULL; - int i; - - for (i = 0; i < 3; i++) { - /* Do the real work */ - edid = nvidia_do_probe_i2c_edid(&par->chan[conn - 1]); - if (edid) - break; - } + u8 *edid; + + edid = fb_ddc_read(&par->chan[conn - 1].adapter); if (!edid && conn == 1) { /* try to get from firmware */ diff --git a/trunk/fs/Kconfig b/trunk/fs/Kconfig index 6a3df055280a..599de54451af 100644 --- a/trunk/fs/Kconfig +++ b/trunk/fs/Kconfig @@ -140,73 +140,6 @@ config EXT3_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. -config EXT4DEV_FS - tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" - depends on EXPERIMENTAL - select JBD2 - help - Ext4dev is a predecessor filesystem of the next generation - extended fs ext4, based on ext3 filesystem code. It will be - renamed ext4 fs later, once ext4dev is mature and stabilized. - - Unlike the change from ext2 filesystem to ext3 filesystem, - the on-disk format of ext4dev is not the same as ext3 any more: - it is based on extent maps and it supports 48-bit physical block - numbers. These combined on-disk format changes will allow - ext4dev/ext4 to handle more than 16 TB filesystem volumes -- - a hard limit that ext3 cannot overcome without changing the - on-disk format. - - Other than extent maps and 48-bit block numbers, ext4dev also is - likely to have other new features such as persistent preallocation, - high resolution time stamps, and larger file support etc. These - features will be added to ext4dev gradually. - - To compile this file system support as a module, choose M here. The - module will be called ext4dev. Be aware, however, that the filesystem - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this could be dangerous. - - If unsure, say N. - -config EXT4DEV_FS_XATTR - bool "Ext4dev extended attributes" - depends on EXT4DEV_FS - default y - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - for details). - - If unsure, say N. - - You need this for POSIX ACL support on ext4dev/ext4. - -config EXT4DEV_FS_POSIX_ACL - bool "Ext4dev POSIX Access Control Lists" - depends on EXT4DEV_FS_XATTR - select FS_POSIX_ACL - help - POSIX Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website . - - If you don't know what Access Control Lists are, say N - -config EXT4DEV_FS_SECURITY - bool "Ext4dev Security Labels" - depends on EXT4DEV_FS_XATTR - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the ext4dev/ext4 filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. - config JBD tristate help @@ -239,44 +172,12 @@ config JBD_DEBUG generated. To turn debugging off again, do "echo 0 > /proc/sys/fs/jbd-debug". -config JBD2 - tristate - help - This is a generic journaling layer for block devices that support - both 32-bit and 64-bit block numbers. It is currently used by - the ext4dev/ext4 filesystem, but it could also be used to add - journal support to other file systems or block devices such - as RAID or LVM. - - If you are using ext4dev/ext4, you need to say Y here. If you are not - using ext4dev/ext4 then you will probably want to say N. - - To compile this device as a module, choose M here. The module will be - called jbd2. If you are compiling ext4dev/ext4 into the kernel, - you cannot compile this code as a module. - -config JBD2_DEBUG - bool "JBD2 (ext4dev/ext4) debugging support" - depends on JBD2 - help - If you are using the ext4dev/ext4 journaled file system (or - potentially any other filesystem/device using JBD2), this option - allows you to enable debugging output while the system is running, - in order to help track down any problems you are having. - By default, the debugging output will be turned off. - - If you select Y here, then you will be able to turn on debugging - with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between - 1 and 5. The higher the number, the more debugging output is - generated. To turn debugging off again, do - "echo 0 > /proc/sys/fs/jbd2-debug". - config FS_MBCACHE -# Meta block cache for Extended Attributes (ext2/ext3/ext4) +# Meta block cache for Extended Attributes (ext2/ext3) tristate - depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR - default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y - default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m + depends on EXT2_FS_XATTR || EXT3_FS_XATTR + default y if EXT2_FS=y || EXT3_FS=y + default m if EXT2_FS=m || EXT3_FS=m config REISERFS_FS tristate "Reiserfs support" @@ -1986,7 +1887,7 @@ config CIFS_EXPERIMENTAL config CIFS_UPCALL bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" depends on CIFS_EXPERIMENTAL - depends on CONNECTOR + select CONNECTOR help Enables an upcall mechanism for CIFS which will be used to contact userspace helper utilities to provide SPNEGO packaged Kerberos diff --git a/trunk/fs/Makefile b/trunk/fs/Makefile index 9a5ce9323bfd..df614eacee86 100644 --- a/trunk/fs/Makefile +++ b/trunk/fs/Makefile @@ -62,9 +62,7 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 -obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev obj-$(CONFIG_JBD) += jbd/ -obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_EXT2_FS) += ext2/ obj-$(CONFIG_CRAMFS) += cramfs/ obj-$(CONFIG_RAMFS) += ramfs/ diff --git a/trunk/fs/afs/dir.c b/trunk/fs/afs/dir.c index a6ec75c56fcf..cf8a2cb28505 100644 --- a/trunk/fs/afs/dir.c +++ b/trunk/fs/afs/dir.c @@ -211,8 +211,8 @@ static int afs_dir_open(struct inode *inode, struct file *file) { _enter("{%lu}", inode->i_ino); - BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); - BUILD_BUG_ON(sizeof(union afs_dirent) != 32); + BUG_ON(sizeof(union afs_dir_block) != 2048); + BUG_ON(sizeof(union afs_dirent) != 32); if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) return -ENOENT; @@ -446,8 +446,8 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, _enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name); /* insanity checks first */ - BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); - BUILD_BUG_ON(sizeof(union afs_dirent) != 32); + BUG_ON(sizeof(union afs_dir_block) != 2048); + BUG_ON(sizeof(union afs_dirent) != 32); if (dentry->d_name.len > 255) { _leave(" = -ENAMETOOLONG"); diff --git a/trunk/fs/autofs4/autofs_i.h b/trunk/fs/autofs4/autofs_i.h index b13f32c8aeee..480ab178cba5 100644 --- a/trunk/fs/autofs4/autofs_i.h +++ b/trunk/fs/autofs4/autofs_i.h @@ -94,6 +94,7 @@ struct autofs_wait_queue { struct autofs_sb_info { u32 magic; + struct dentry *root; int pipefd; struct file *pipe; pid_t oz_pgrp; @@ -228,4 +229,4 @@ static inline int __simple_empty(struct dentry *dentry) } void autofs4_dentry_release(struct dentry *); -extern void autofs4_kill_sb(struct super_block *); + diff --git a/trunk/fs/autofs4/init.c b/trunk/fs/autofs4/init.c index 723a1c5e361b..5d9193332bef 100644 --- a/trunk/fs/autofs4/init.c +++ b/trunk/fs/autofs4/init.c @@ -24,7 +24,7 @@ static struct file_system_type autofs_fs_type = { .owner = THIS_MODULE, .name = "autofs", .get_sb = autofs_get_sb, - .kill_sb = autofs4_kill_sb, + .kill_sb = kill_anon_super, }; static int __init init_autofs4_fs(void) diff --git a/trunk/fs/autofs4/inode.c b/trunk/fs/autofs4/inode.c index 51fd8595bf85..800ce876caec 100644 --- a/trunk/fs/autofs4/inode.c +++ b/trunk/fs/autofs4/inode.c @@ -96,7 +96,7 @@ void autofs4_free_ino(struct autofs_info *ino) */ static void autofs4_force_release(struct autofs_sb_info *sbi) { - struct dentry *this_parent = sbi->sb->s_root; + struct dentry *this_parent = sbi->root; struct list_head *next; spin_lock(&dcache_lock); @@ -127,7 +127,7 @@ static void autofs4_force_release(struct autofs_sb_info *sbi) spin_lock(&dcache_lock); } - if (this_parent != sbi->sb->s_root) { + if (this_parent != sbi->root) { struct dentry *dentry = this_parent; next = this_parent->d_u.d_child.next; @@ -140,9 +140,15 @@ static void autofs4_force_release(struct autofs_sb_info *sbi) goto resume; } spin_unlock(&dcache_lock); + + dput(sbi->root); + sbi->root = NULL; + shrink_dcache_sb(sbi->sb); + + return; } -void autofs4_kill_sb(struct super_block *sb) +static void autofs4_put_super(struct super_block *sb) { struct autofs_sb_info *sbi = autofs4_sbi(sb); @@ -157,7 +163,6 @@ void autofs4_kill_sb(struct super_block *sb) kfree(sbi); DPRINTK("shutting down"); - kill_anon_super(sb); } static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) @@ -184,6 +189,7 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) } static struct super_operations autofs4_sops = { + .put_super = autofs4_put_super, .statfs = simple_statfs, .show_options = autofs4_show_options, }; @@ -309,6 +315,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; + sbi->root = NULL; sbi->pipefd = -1; sbi->catatonic = 0; sbi->exp_timeout = 0; @@ -389,6 +396,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->pipe = pipe; sbi->pipefd = pipefd; + /* + * Take a reference to the root dentry so we get a chance to + * clean up the dentry tree on umount. + * See autofs4_force_release. + */ + sbi->root = dget(root); + /* * Success! Install the root dentry now to indicate completion. */ diff --git a/trunk/fs/autofs4/waitq.c b/trunk/fs/autofs4/waitq.c index c0a6c8d445c7..ce103e7b0bc3 100644 --- a/trunk/fs/autofs4/waitq.c +++ b/trunk/fs/autofs4/waitq.c @@ -45,6 +45,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; } + shrink_dcache_sb(sbi->sb); } static int autofs4_write(struct file *file, const void *addr, int bytes) diff --git a/trunk/fs/binfmt_elf.c b/trunk/fs/binfmt_elf.c index 79b05a1a4365..06435f3665f4 100644 --- a/trunk/fs/binfmt_elf.c +++ b/trunk/fs/binfmt_elf.c @@ -1152,7 +1152,7 @@ static int dump_write(struct file *file, const void *addr, int nr) static int dump_seek(struct file *file, loff_t off) { if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + if (file->f_op->llseek(file, off, 1) != off) return 0; } else { char *buf = (char *)get_zeroed_page(GFP_KERNEL); @@ -1220,7 +1220,7 @@ static int notesize(struct memelfnote *en) static int alignfile(struct file *file, loff_t *foffset) { - static const char buf[4] = { 0, }; + char buf[4] = { 0, }; DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); return 1; } @@ -1569,8 +1569,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) DUMP_WRITE(elf, sizeof(*elf)); offset += sizeof(*elf); /* Elf header */ - offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */ - foffset = offset; + offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ /* Write notes phdr entry */ { @@ -1587,6 +1586,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) DUMP_WRITE(&phdr, sizeof(phdr)); } + foffset = offset; + dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); /* Write program headers for segments dump */ @@ -1611,6 +1612,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) phdr.p_align = ELF_EXEC_PAGESIZE; DUMP_WRITE(&phdr, sizeof(phdr)); + foffset += sizeof(phdr); } #ifdef ELF_CORE_WRITE_EXTRA_PHDRS diff --git a/trunk/fs/bio.c b/trunk/fs/bio.c index f95c8749499f..8f93e939f213 100644 --- a/trunk/fs/bio.c +++ b/trunk/fs/bio.c @@ -79,6 +79,7 @@ static struct bio_set *fs_bio_set; static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) { struct bio_vec *bvl; + struct biovec_slab *bp; /* * see comment near bvec_array define! @@ -97,12 +98,10 @@ static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned lon * idx now points to the pool we want to allocate from */ + bp = bvec_slabs + *idx; bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) { - struct biovec_slab *bp = bvec_slabs + *idx; - + if (bvl) memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec)); - } return bvl; } @@ -167,7 +166,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (likely(nr_iovecs)) { - unsigned long idx = 0; /* shut up gcc */ + unsigned long idx; bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); if (unlikely(!bvl)) { diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c index f65ef8821c73..eeb8ac1aa856 100644 --- a/trunk/fs/buffer.c +++ b/trunk/fs/buffer.c @@ -1042,21 +1042,8 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) } while ((size << sizebits) < PAGE_SIZE); index = block >> sizebits; - - /* - * Check for a block which wants to lie outside our maximum possible - * pagecache index. (this comparison is done using sector_t types). - */ - if (unlikely(index != block >> sizebits)) { - char b[BDEVNAME_SIZE]; - - printk(KERN_ERR "%s: requested out-of-range block %llu for " - "device %s\n", - __FUNCTION__, (unsigned long long)block, - bdevname(bdev, b)); - return -EIO; - } block = index << sizebits; + /* Create a page with the proper size buffers.. */ page = grow_dev_page(bdev, block, index, size); if (!page) @@ -1083,16 +1070,12 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) for (;;) { struct buffer_head * bh; - int ret; bh = __find_get_block(bdev, block, size); if (bh) return bh; - ret = grow_buffers(bdev, block, size); - if (ret < 0) - return NULL; - if (ret == 0) + if (!grow_buffers(bdev, block, size)) free_more_memory(); } } @@ -1854,7 +1837,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page, clear_buffer_new(bh); kaddr = kmap_atomic(page, KM_USER0); memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -2361,7 +2343,6 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, */ kaddr = kmap_atomic(page, KM_USER0); memset(kaddr, 0, PAGE_CACHE_SIZE); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); SetPageUptodate(page); set_page_dirty(page); diff --git a/trunk/fs/cifs/cifsacl.h b/trunk/fs/cifs/cifsacl.h index 5eff35d6e564..d0776ac2b804 100644 --- a/trunk/fs/cifs/cifsacl.h +++ b/trunk/fs/cifs/cifsacl.h @@ -31,8 +31,8 @@ struct cifs_sid { } __attribute__((packed)); /* everyone */ -/* extern const struct cifs_sid sid_everyone;*/ +extern const struct cifs_sid sid_everyone; /* group users */ -/* extern const struct cifs_sid sid_user;*/ +extern const struct cifs_sid sid_user; #endif /* _CIFSACL_H */ diff --git a/trunk/fs/cifs/cifsencrypt.h b/trunk/fs/cifs/cifsencrypt.h index 152fa2dcfc6c..03e359b32861 100644 --- a/trunk/fs/cifs/cifsencrypt.h +++ b/trunk/fs/cifs/cifsencrypt.h @@ -27,6 +27,8 @@ extern void mdfour(unsigned char *out, unsigned char *in, int n); /* smbdes.c */ extern void E_P16(unsigned char *p14, unsigned char *p16); extern void E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24); +extern void D_P16(unsigned char *p14, unsigned char *in, unsigned char *out); +extern void E_old_pw_hash(unsigned char *, unsigned char *, unsigned char *); diff --git a/trunk/fs/cifs/cifsfs.c b/trunk/fs/cifs/cifsfs.c index 84976cdbe713..c00c654f2e11 100644 --- a/trunk/fs/cifs/cifsfs.c +++ b/trunk/fs/cifs/cifsfs.c @@ -63,7 +63,6 @@ extern struct task_struct * oplockThread; /* remove sparse warning */ struct task_struct * oplockThread = NULL; extern struct task_struct * dnotifyThread; /* remove sparse warning */ struct task_struct * dnotifyThread = NULL; -static struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, int, 0); MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048"); @@ -199,12 +198,10 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) /* Only need to call the old QFSInfo if failed on newer one */ if(rc) - if(pTcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */ + rc = CIFSSMBQFSInfo(xid, pTcon, buf); - /* Some old Windows servers also do not support level 103, retry with - older level one if old server failed the previous call or we - bypassed it because we detected that this was an older LANMAN sess */ + /* Old Windows servers do not support level 103, retry with level + one if old server failed the previous call */ if(rc) rc = SMBOldQFSInfo(xid, pTcon, buf); /* @@ -438,21 +435,13 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) return; } -#ifdef CONFIG_CIFS_STATS2 -static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt) -{ - /* BB FIXME */ - return 0; -} -#endif - static int cifs_remount(struct super_block *sb, int *flags, char *data) { *flags |= MS_NODIRATIME; return 0; } -static struct super_operations cifs_super_ops = { +struct super_operations cifs_super_ops = { .read_inode = cifs_read_inode, .put_super = cifs_put_super, .statfs = cifs_statfs, @@ -465,9 +454,6 @@ static struct super_operations cifs_super_ops = { .show_options = cifs_show_options, .umount_begin = cifs_umount_begin, .remount_fs = cifs_remount, -#ifdef CONFIG_CIFS_STATS2 - .show_stats = cifs_show_stats, -#endif }; static int @@ -509,7 +495,7 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) { /* origin == SEEK_END => we must revalidate the cached file length */ - if (origin == SEEK_END) { + if (origin == 2) { int retval = cifs_revalidate(file->f_dentry); if (retval < 0) return (loff_t)retval; @@ -917,7 +903,7 @@ init_cifs(void) #ifdef CONFIG_PROC_FS cifs_proc_init(); #endif -/* INIT_LIST_HEAD(&GlobalServerList);*/ /* BB not implemented yet */ + INIT_LIST_HEAD(&GlobalServerList); /* BB not implemented yet */ INIT_LIST_HEAD(&GlobalSMBSessionList); INIT_LIST_HEAD(&GlobalTreeConnectionList); INIT_LIST_HEAD(&GlobalOplock_Q); @@ -945,7 +931,6 @@ init_cifs(void) GlobalCurrentXid = 0; GlobalTotalActiveXid = 0; GlobalMaxActiveXid = 0; - memset(Local_System_Name, 0, 15); rwlock_init(&GlobalSMBSeslock); spin_lock_init(&GlobalMid_Lock); diff --git a/trunk/fs/cifs/cifsfs.h b/trunk/fs/cifs/cifsfs.h index a243f779b363..bea875d9a46a 100644 --- a/trunk/fs/cifs/cifsfs.h +++ b/trunk/fs/cifs/cifsfs.h @@ -36,7 +36,7 @@ extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; /* Functions related to super block operations */ -/* extern struct super_operations cifs_super_ops;*/ +extern struct super_operations cifs_super_ops; extern void cifs_read_inode(struct inode *); extern void cifs_delete_inode(struct inode *); /* extern void cifs_write_inode(struct inode *); *//* BB not needed yet */ diff --git a/trunk/fs/cifs/cifsglob.h b/trunk/fs/cifs/cifsglob.h index 74d3ccbb103b..b24006c47df1 100644 --- a/trunk/fs/cifs/cifsglob.h +++ b/trunk/fs/cifs/cifsglob.h @@ -153,7 +153,7 @@ struct TCP_Server_Info { char sessid[4]; /* unique token id for this session */ /* (returned on Negotiate */ int capabilities; /* allow selective disabling of caps by smb sess */ - int timeAdj; /* Adjust for difference in server time zone in sec */ + __u16 timeZone; __u16 CurrentMid; /* multiplex id - rotating counter */ char cryptKey[CIFS_CRYPTO_KEY_SIZE]; /* 16th byte of RFC1001 workstation name is always null */ @@ -203,14 +203,9 @@ struct cifsSesInfo { char * domainName; char * password; }; -/* no more than one of the following three session flags may be set */ +/* session flags */ #define CIFS_SES_NT4 1 -#define CIFS_SES_OS2 2 -#define CIFS_SES_W9X 4 -/* following flag is set for old servers such as OS2 (and Win95?) - which do not negotiate NTLM or POSIX dialects, but instead - negotiate one of the older LANMAN dialects */ -#define CIFS_SES_LANMAN 8 + /* * there is one of these for each connection to a resource on a particular * session @@ -517,8 +512,7 @@ require use of the stronger protocol */ * This list helps improve performance and eliminate the messages indicating * that we had a communications error talking to the server in this list. */ -/* Feature not supported */ -/* GLOBAL_EXTERN struct servers_not_supported *NotSuppList; */ +GLOBAL_EXTERN struct servers_not_supported *NotSuppList; /*@z4a */ /* * The following is a hash table of all the users we know about. @@ -574,6 +568,7 @@ GLOBAL_EXTERN unsigned int lookupCacheEnabled; GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent with more secure ntlmssp2 challenge/resp */ GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ +GLOBAL_EXTERN unsigned int secFlags; GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ diff --git a/trunk/fs/cifs/cifspdu.h b/trunk/fs/cifs/cifspdu.h index 6df9dadba647..81df2bf8e75a 100644 --- a/trunk/fs/cifs/cifspdu.h +++ b/trunk/fs/cifs/cifspdu.h @@ -26,8 +26,7 @@ #ifdef CONFIG_CIFS_WEAK_PW_HASH #define LANMAN_PROT 0 -#define LANMAN2_PROT 1 -#define CIFS_PROT 2 +#define CIFS_PROT 1 #else #define CIFS_PROT 0 #endif @@ -409,8 +408,6 @@ typedef struct negotiate_req { /* Dialect index is 13 for LANMAN */ -#define MIN_TZ_ADJ (15 * 60) /* minimum grid for timezones in seconds */ - typedef struct lanman_neg_rsp { struct smb_hdr hdr; /* wct = 13 */ __le16 DialectIndex; @@ -420,10 +417,7 @@ typedef struct lanman_neg_rsp { __le16 MaxNumberVcs; __le16 RawMode; __le32 SessionKey; - struct { - __le16 Time; - __le16 Date; - } __attribute__((packed)) SrvTime; + __le32 ServerTime; __le16 ServerTimeZone; __le16 EncryptionKeyLength; __le16 Reserved; @@ -680,7 +674,7 @@ typedef union smb_com_tree_disconnect { /* as an altetnative can use flag on typedef struct smb_com_close_req { struct smb_hdr hdr; /* wct = 3 */ __u16 FileID; - __u32 LastWriteTime; /* should be zero or -1 */ + __u32 LastWriteTime; /* should be zero */ __u16 ByteCount; /* 0 */ } __attribute__((packed)) CLOSE_REQ; diff --git a/trunk/fs/cifs/cifsproto.h b/trunk/fs/cifs/cifsproto.h index f1f8225102f0..b35c55c3c8bb 100644 --- a/trunk/fs/cifs/cifsproto.h +++ b/trunk/fs/cifs/cifsproto.h @@ -50,12 +50,12 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */ , const int long_op); -extern int SendReceiveBlockingLock(const unsigned int /* xid */ , - struct cifsTconInfo *, +extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , int * /* bytes returned */); -extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); +extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); +extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); extern int is_size_safe_to_change(struct cifsInodeInfo *); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *); @@ -80,9 +80,6 @@ extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, extern void DeleteOplockQEntry(struct oplock_q_entry *); extern struct timespec cifs_NTtimeToUnix(u64 /* utc nanoseconds since 1601 */ ); extern u64 cifs_UnixTimeToNT(struct timespec); -extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); -extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); - extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, FILE_ALL_INFO * pfile_info, @@ -119,7 +116,6 @@ extern int CIFSFindClose(const int, struct cifsTconInfo *tcon, extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, FILE_ALL_INFO * findData, - int legacy /* whether to use old info level */, const struct nls_table *nls_codepage, int remap); extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, @@ -283,6 +279,8 @@ extern void sesInfoFree(struct cifsSesInfo *); extern struct cifsTconInfo *tconInfoAlloc(void); extern void tconInfoFree(struct cifsTconInfo *); +extern int cifs_reconnect(struct TCP_Server_Info *server); + extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *,__u32 *); extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); diff --git a/trunk/fs/cifs/cifssmb.c b/trunk/fs/cifs/cifssmb.c index 098790eb2aa1..075d8fb3d376 100644 --- a/trunk/fs/cifs/cifssmb.c +++ b/trunk/fs/cifs/cifssmb.c @@ -46,7 +46,6 @@ static struct { } protocols[] = { #ifdef CONFIG_CIFS_WEAK_PW_HASH {LANMAN_PROT, "\2LM1.2X002"}, - {LANMAN2_PROT, "\2LANMAN2.1"}, #endif /* weak password hashing for legacy clients */ {CIFS_PROT, "\2NT LM 0.12"}, {POSIX_PROT, "\2POSIX 2"}, @@ -59,7 +58,6 @@ static struct { } protocols[] = { #ifdef CONFIG_CIFS_WEAK_PW_HASH {LANMAN_PROT, "\2LM1.2X002"}, - {LANMAN2_PROT, "\2LANMAN2.1"}, #endif /* weak password hashing for legacy clients */ {CIFS_PROT, "\2NT LM 0.12"}, {BAD_PROT, "\2"} @@ -69,13 +67,13 @@ static struct { /* define the number of elements in the cifs dialect array */ #ifdef CONFIG_CIFS_POSIX #ifdef CONFIG_CIFS_WEAK_PW_HASH -#define CIFS_NUM_PROT 4 +#define CIFS_NUM_PROT 3 #else #define CIFS_NUM_PROT 2 #endif /* CIFS_WEAK_PW_HASH */ #else /* not posix */ #ifdef CONFIG_CIFS_WEAK_PW_HASH -#define CIFS_NUM_PROT 3 +#define CIFS_NUM_PROT 2 #else #define CIFS_NUM_PROT 1 #endif /* CONFIG_CIFS_WEAK_PW_HASH */ @@ -399,7 +397,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) struct TCP_Server_Info * server; u16 count; unsigned int secFlags; - u16 dialect; if(ses->server) server = ses->server; @@ -439,10 +436,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) if (rc != 0) goto neg_err_exit; - dialect = le16_to_cpu(pSMBr->DialectIndex); - cFYI(1,("Dialect: %d", dialect)); + cFYI(1,("Dialect: %d", pSMBr->DialectIndex)); /* Check wct = 1 error case */ - if((pSMBr->hdr.WordCount < 13) || (dialect == BAD_PROT)) { + if((pSMBr->hdr.WordCount < 13) || (pSMBr->DialectIndex == BAD_PROT)) { /* core returns wct = 1, but we do not ask for core - otherwise small wct just comes when dialect index is -1 indicating we could not negotiate a common dialect */ @@ -450,9 +446,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) goto neg_err_exit; #ifdef CONFIG_CIFS_WEAK_PW_HASH } else if((pSMBr->hdr.WordCount == 13) - && ((dialect == LANMAN_PROT) - || (dialect == LANMAN2_PROT))) { - __s16 tmp; + && (pSMBr->DialectIndex == LANMAN_PROT)) { struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr; if((secFlags & CIFSSEC_MAY_LANMAN) || @@ -478,44 +472,12 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) server->maxRw = 0;/* we do not need to use raw anyway */ server->capabilities = CAP_MPX_MODE; } - tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); - if (tmp == -1) { - /* OS/2 often does not set timezone therefore - * we must use server time to calc time zone. - * Could deviate slightly from the right zone. - * Smallest defined timezone difference is 15 minutes - * (i.e. Nepal). Rounding up/down is done to match - * this requirement. - */ - int val, seconds, remain, result; - struct timespec ts, utc; - utc = CURRENT_TIME; - ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date), - le16_to_cpu(rsp->SrvTime.Time)); - cFYI(1,("SrvTime: %d sec since 1970 (utc: %d) diff: %d", - (int)ts.tv_sec, (int)utc.tv_sec, - (int)(utc.tv_sec - ts.tv_sec))); - val = (int)(utc.tv_sec - ts.tv_sec); - seconds = val < 0 ? -val : val; - result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; - remain = seconds % MIN_TZ_ADJ; - if(remain >= (MIN_TZ_ADJ / 2)) - result += MIN_TZ_ADJ; - if(val < 0) - result = - result; - server->timeAdj = result; - } else { - server->timeAdj = (int)tmp; - server->timeAdj *= 60; /* also in seconds */ - } - cFYI(1,("server->timeAdj: %d seconds", server->timeAdj)); - + server->timeZone = le16_to_cpu(rsp->ServerTimeZone); /* BB get server time for time conversions and add code to use it and timezone since this is not UTC */ - if (rsp->EncryptionKeyLength == - cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { + if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { memcpy(server->cryptKey, rsp->EncryptionKey, CIFS_CRYPTO_KEY_SIZE); } else if (server->secMode & SECMODE_PW_ENCRYPT) { @@ -569,8 +531,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) cFYI(0, ("Max buf = %d", ses->server->maxBuf)); GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); server->capabilities = le32_to_cpu(pSMBr->Capabilities); - server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); - server->timeAdj *= 60; + server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone); if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { memcpy(server->cryptKey, pSMBr->u.EncryptionKey, CIFS_CRYPTO_KEY_SIZE); @@ -1656,7 +1617,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) pSMBr = (CLOSE_RSP *)pSMB; /* BB removeme BB */ pSMB->FileID = (__u16) smb_file_id; - pSMB->LastWriteTime = 0xFFFFFFFF; + pSMB->LastWriteTime = 0; pSMB->ByteCount = 0; rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); @@ -2812,11 +2773,9 @@ CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, /* security id for everyone */ -const static struct cifs_sid sid_everyone = - {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}}; +const struct cifs_sid sid_everyone = {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}}; /* group users */ -const static struct cifs_sid sid_user = - {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}}; +const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}}; /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_sid * psec_desc, int acl_len) @@ -2897,6 +2856,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, return rc; } + /* Legacy Query Path Information call for lookup to old servers such as Win9x/WinME */ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, @@ -2938,16 +2898,7 @@ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, if (rc) { cFYI(1, ("Send error in QueryInfo = %d", rc)); } else if (pFinfo) { /* decode response */ - struct timespec ts; - __u32 time = le32_to_cpu(pSMBr->last_write_time); - /* BB FIXME - add time zone adjustment BB */ memset(pFinfo, 0, sizeof(FILE_ALL_INFO)); - ts.tv_nsec = 0; - ts.tv_sec = time; - /* decode time fields */ - pFinfo->ChangeTime = cpu_to_le64(cifs_UnixTimeToNT(ts)); - pFinfo->LastWriteTime = pFinfo->ChangeTime; - pFinfo->LastAccessTime = 0; pFinfo->AllocationSize = cpu_to_le64(le32_to_cpu(pSMBr->size)); pFinfo->EndOfFile = pFinfo->AllocationSize; @@ -2971,7 +2922,6 @@ int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, FILE_ALL_INFO * pFindData, - int legacy /* old style infolevel */, const struct nls_table *nls_codepage, int remap) { /* level 263 SMB_QUERY_FILE_ALL_INFO */ @@ -3020,10 +2970,7 @@ CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, byte_count = params + 1 /* pad */ ; pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; - if(legacy) - pSMB->InformationLevel = cpu_to_le16(SMB_INFO_STANDARD); - else - pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); + pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); @@ -3035,24 +2982,13 @@ CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if (rc) /* BB add auto retry on EOPNOTSUPP? */ - rc = -EIO; - else if (!legacy && (pSMBr->ByteCount < 40)) + if (rc || (pSMBr->ByteCount < 40)) rc = -EIO; /* bad smb */ - else if(legacy && (pSMBr->ByteCount < 24)) - rc = -EIO; /* 24 or 26 expected but we do not read last field */ else if (pFindData){ - int size; __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - if(legacy) /* we do not read the last field, EAsize, fortunately - since it varies by subdialect and on Set vs. Get, is - two bytes or 4 bytes depending but we don't care here */ - size = sizeof(FILE_INFO_STANDARD); - else - size = sizeof(FILE_ALL_INFO); memcpy((char *) pFindData, (char *) &pSMBr->hdr.Protocol + - data_offset, size); + data_offset, sizeof (FILE_ALL_INFO)); } else rc = -ENOMEM; } @@ -3677,14 +3613,6 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, strncpy(pSMB->RequestFileName, searchName, name_len); } - if(ses->server) { - if(ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - } - - pSMB->hdr.Uid = ses->Suid; - params = 2 /* level */ + name_len /*includes null */ ; pSMB->TotalDataCount = 0; pSMB->DataCount = 0; diff --git a/trunk/fs/cifs/connect.c b/trunk/fs/cifs/connect.c index 4093d5332930..c78762051da4 100644 --- a/trunk/fs/cifs/connect.c +++ b/trunk/fs/cifs/connect.c @@ -109,7 +109,7 @@ static int ipv6_connect(struct sockaddr_in6 *psin_server, * wake up waiters on reconnection? - (not needed currently) */ -static int +int cifs_reconnect(struct TCP_Server_Info *server) { int rc = 0; @@ -771,18 +771,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) separator[0] = ','; separator[1] = 0; - if (Local_System_Name[0] != 0) - memcpy(vol->source_rfc1001_name, Local_System_Name,15); - else { - char *nodename = utsname()->nodename; - int n = strnlen(nodename,15); - memset(vol->source_rfc1001_name,0x20,15); - for(i=0 ; i < n ; i++) { - /* does not have to be perfect mapping since field is - informational, only used for servers that do not support - port 445 and it can be overridden at mount time */ - vol->source_rfc1001_name[i] = toupper(nodename[i]); - } + memset(vol->source_rfc1001_name,0x20,15); + for(i=0;i < strnlen(utsname()->nodename,15);i++) { + /* does not have to be a perfect mapping since the field is + informational, only used for servers that do not support + port 445 and it can be overridden at mount time */ + vol->source_rfc1001_name[i] = + toupper(utsname()->nodename[i]); } vol->source_rfc1001_name[15] = 0; /* null target name indicates to use *SMBSERVR default called name @@ -3220,9 +3215,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, } /* else do not bother copying these informational fields */ } - if((smb_buffer_response->WordCount == 3) || - (smb_buffer_response->WordCount == 7)) - /* field is in same location */ + if(smb_buffer_response->WordCount == 3) tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); else tcon->Flags = 0; @@ -3319,21 +3312,19 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, first_time = 1; } if (!rc) { - pSesInfo->flags = 0; pSesInfo->capabilities = pSesInfo->server->capabilities; if(linuxExtEnabled == 0) pSesInfo->capabilities &= (~CAP_UNIX); /* pSesInfo->sequence_number = 0;*/ - cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", + cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x Time Zone: %d", pSesInfo->server->secMode, pSesInfo->server->capabilities, - pSesInfo->server->timeAdj)); + pSesInfo->server->timeZone)); if(experimEnabled < 2) rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); else if (extended_security - && (pSesInfo->capabilities - & CAP_EXTENDED_SECURITY) + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) && (pSesInfo->server->secType == NTLMSSP)) { rc = -EOPNOTSUPP; } else if (extended_security @@ -3347,7 +3338,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, if (!rc) { if(ntlmv2_flag) { char * v2_response; - cFYI(1,("more secure NTLM ver2 hash")); + cFYI(1,("Can use more secure NTLM version 2 password hash")); if(CalcNTLMv2_partial_mac_key(pSesInfo, nls_info)) { rc = -ENOMEM; diff --git a/trunk/fs/cifs/inode.c b/trunk/fs/cifs/inode.c index 35d54bb0869a..6b90ef98e4cf 100644 --- a/trunk/fs/cifs/inode.c +++ b/trunk/fs/cifs/inode.c @@ -337,7 +337,6 @@ int cifs_get_inode_info(struct inode **pinode, pfindData = (FILE_ALL_INFO *)buf; /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, pTcon, search_path, pfindData, - 0 /* not legacy */, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); /* BB optimize code so we do not make the above call @@ -385,10 +384,8 @@ int cifs_get_inode_info(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { *pinode = new_inode(sb); - if (*pinode == NULL) { - kfree(buf); + if (*pinode == NULL) return -ENOMEM; - } /* Is an i_ino of zero legal? Can we use that to check if the server supports returning inode numbers? Are there other sanity checks we can use to ensure that @@ -434,11 +431,8 @@ int cifs_get_inode_info(struct inode **pinode, (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ /* Linux can not store file creation time so ignore it */ - if(pfindData->LastAccessTime) - inode->i_atime = cifs_NTtimeToUnix - (le64_to_cpu(pfindData->LastAccessTime)); - else /* do not need to use current_fs_time - time not stored */ - inode->i_atime = CURRENT_TIME; + inode->i_atime = + cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); inode->i_mtime = cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); inode->i_ctime = diff --git a/trunk/fs/cifs/link.c b/trunk/fs/cifs/link.c index 0bee8b7e521a..a57f5d6e6213 100644 --- a/trunk/fs/cifs/link.c +++ b/trunk/fs/cifs/link.c @@ -254,11 +254,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) tmpbuffer, len - 1, cifs_sb->local_nls); - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - cERROR(1,("SFU style symlinks not implemented yet")); - /* add open and read as in fs/cifs/inode.c */ - - } else { + else { rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ, OPEN_REPARSE_POINT,&fid, &oplock, NULL, cifs_sb->local_nls, diff --git a/trunk/fs/cifs/md5.c b/trunk/fs/cifs/md5.c index ccebf9b7eb86..7aa23490541f 100644 --- a/trunk/fs/cifs/md5.c +++ b/trunk/fs/cifs/md5.c @@ -252,11 +252,10 @@ MD5Transform(__u32 buf[4], __u32 const in[16]) buf[3] += d; } -#if 0 /* currently unused */ /*********************************************************************** the rfc 2104 version of hmac_md5 initialisation. ***********************************************************************/ -static void +void hmac_md5_init_rfc2104(unsigned char *key, int key_len, struct HMACMD5Context *ctx) { @@ -290,7 +289,6 @@ hmac_md5_init_rfc2104(unsigned char *key, int key_len, MD5Init(&ctx->ctx); MD5Update(&ctx->ctx, ctx->k_ipad, 64); } -#endif /*********************************************************************** the microsoft version of hmac_md5 initialisation. @@ -352,8 +350,7 @@ hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx) single function to calculate an HMAC MD5 digest from data. use the microsoft hmacmd5 init method because the key is 16 bytes. ************************************************************/ -#if 0 /* currently unused */ -static void +void hmac_md5(unsigned char key[16], unsigned char *data, int data_len, unsigned char *digest) { @@ -364,4 +361,3 @@ hmac_md5(unsigned char key[16], unsigned char *data, int data_len, } hmac_md5_final(digest, &ctx); } -#endif diff --git a/trunk/fs/cifs/md5.h b/trunk/fs/cifs/md5.h index f7d4f4197bac..00e1c5394fe1 100644 --- a/trunk/fs/cifs/md5.h +++ b/trunk/fs/cifs/md5.h @@ -27,12 +27,12 @@ void MD5Final(unsigned char digest[16], struct MD5Context *context); /* The following definitions come from lib/hmacmd5.c */ -/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len, - struct HMACMD5Context *ctx);*/ +void hmac_md5_init_rfc2104(unsigned char *key, int key_len, + struct HMACMD5Context *ctx); void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, struct HMACMD5Context *ctx); void hmac_md5_update(const unsigned char *text, int text_len, struct HMACMD5Context *ctx); void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx); -/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len, - unsigned char *digest);*/ +void hmac_md5(unsigned char key[16], unsigned char *data, int data_len, + unsigned char *digest); diff --git a/trunk/fs/cifs/misc.c b/trunk/fs/cifs/misc.c index bbc9cd34b6ea..22c937e5884f 100644 --- a/trunk/fs/cifs/misc.c +++ b/trunk/fs/cifs/misc.c @@ -389,7 +389,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , return; } -static int +int checkSMBhdr(struct smb_hdr *smb, __u16 mid) { /* Make sure that this really is an SMB, that it is a response, @@ -418,42 +418,26 @@ checkSMBhdr(struct smb_hdr *smb, __u16 mid) } int -checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) +checkSMB(struct smb_hdr *smb, __u16 mid, int length) { __u32 len = smb->smb_buf_length; __u32 clc_len; /* calculated length */ cFYI(0, ("checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len)); - - if (length < 2 + sizeof (struct smb_hdr)) { - if ((length >= sizeof (struct smb_hdr) - 1) + if (((unsigned int)length < 2 + sizeof (struct smb_hdr)) || + (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)) { + if ((unsigned int)length < 2 + sizeof (struct smb_hdr)) { + if (((unsigned int)length >= + sizeof (struct smb_hdr) - 1) && (smb->Status.CifsError != 0)) { - smb->WordCount = 0; - /* some error cases do not return wct and bcc */ - return 0; - } else if ((length == sizeof(struct smb_hdr) + 1) && - (smb->WordCount == 0)) { - char * tmp = (char *)smb; - /* Need to work around a bug in two servers here */ - /* First, check if the part of bcc they sent was zero */ - if (tmp[sizeof(struct smb_hdr)] == 0) { - /* some servers return only half of bcc - * on simple responses (wct, bcc both zero) - * in particular have seen this on - * ulogoffX and FindClose. This leaves - * one byte of bcc potentially unitialized - */ - /* zero rest of bcc */ - tmp[sizeof(struct smb_hdr)+1] = 0; + smb->WordCount = 0; + /* some error cases do not return wct and bcc */ return 0; + } else { + cERROR(1, ("Length less than smb header size")); } - cERROR(1,("rcvd invalid byte count (bcc)")); - } else { - cERROR(1, ("Length less than smb header size")); } - return 1; - } - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cERROR(1, ("smb length greater than MaxBufSize, mid=%d", + if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) + cERROR(1, ("smb length greater than MaxBufSize, mid=%d", smb->Mid)); return 1; } @@ -462,7 +446,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) return 1; clc_len = smbCalcSize_LE(smb); - if(4 + len != length) { + if(4 + len != (unsigned int)length) { cERROR(1, ("Length read does not match RFC1001 length %d",len)); return 1; } diff --git a/trunk/fs/cifs/netmisc.c b/trunk/fs/cifs/netmisc.c index 992e80edc720..ce87550e918f 100644 --- a/trunk/fs/cifs/netmisc.c +++ b/trunk/fs/cifs/netmisc.c @@ -909,61 +909,3 @@ cifs_UnixTimeToNT(struct timespec t) /* Convert to 100ns intervals and then add the NTFS time offset. */ return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET; } - -static int total_days_of_prev_months[] = -{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; - - -__le64 cnvrtDosCifsTm(__u16 date, __u16 time) -{ - return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time))); -} - -struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) -{ - struct timespec ts; - int sec, min, days, month, year; - SMB_TIME * st = (SMB_TIME *)&time; - SMB_DATE * sd = (SMB_DATE *)&date; - - cFYI(1,("date %d time %d",date, time)); - - sec = 2 * st->TwoSeconds; - min = st->Minutes; - if((sec > 59) || (min > 59)) - cERROR(1,("illegal time min %d sec %d", min, sec)); - sec += (min * 60); - sec += 60 * 60 * st->Hours; - if(st->Hours > 24) - cERROR(1,("illegal hours %d",st->Hours)); - days = sd->Day; - month = sd->Month; - if((days > 31) || (month > 12)) - cERROR(1,("illegal date, month %d day: %d", month, days)); - month -= 1; - days += total_days_of_prev_months[month]; - days += 3652; /* account for difference in days between 1980 and 1970 */ - year = sd->Year; - days += year * 365; - days += (year/4); /* leap year */ - /* generalized leap year calculation is more complex, ie no leap year - for years/100 except for years/400, but since the maximum number for DOS - year is 2**7, the last year is 1980+127, which means we need only - consider 2 special case years, ie the years 2000 and 2100, and only - adjust for the lack of leap year for the year 2100, as 2000 was a - leap year (divisable by 400) */ - if(year >= 120) /* the year 2100 */ - days = days - 1; /* do not count leap year for the year 2100 */ - - /* adjust for leap year where we are still before leap day */ - if(year != 120) - days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0); - sec += 24 * 60 * 60 * days; - - ts.tv_sec = sec; - - /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ - - ts.tv_nsec = 0; - return ts; -} diff --git a/trunk/fs/cifs/readdir.c b/trunk/fs/cifs/readdir.c index b5b0a2a41bef..b27b34537bf2 100644 --- a/trunk/fs/cifs/readdir.c +++ b/trunk/fs/cifs/readdir.c @@ -106,17 +106,6 @@ static int construct_dentry(struct qstr *qstring, struct file *file, return rc; } -static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode) -{ - if((tcon) && (tcon->ses) && (tcon->ses->server)) { - inode->i_ctime.tv_sec += tcon->ses->server->timeAdj; - inode->i_mtime.tv_sec += tcon->ses->server->timeAdj; - inode->i_atime.tv_sec += tcon->ses->server->timeAdj; - } - return; -} - - static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, char * buf, int *pobject_type, int isNewInode) { @@ -146,23 +135,16 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); } else { /* legacy, OS2 and DOS style */ -/* struct timespec ts;*/ FIND_FILE_STANDARD_INFO * pfindData = (FIND_FILE_STANDARD_INFO *)buf; - tmp_inode->i_mtime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastWriteDate), - le16_to_cpu(pfindData->LastWriteTime)); - tmp_inode->i_atime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastAccessDate), - le16_to_cpu(pfindData->LastAccessTime)); - tmp_inode->i_ctime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastWriteDate), - le16_to_cpu(pfindData->LastWriteTime)); - AdjustForTZ(cifs_sb->tcon, tmp_inode); attr = le16_to_cpu(pfindData->Attributes); allocation_size = le32_to_cpu(pfindData->AllocationSize); end_of_file = le32_to_cpu(pfindData->DataSize); + tmp_inode->i_atime = CURRENT_TIME; + /* tmp_inode->i_mtime = BB FIXME - add dos time handling + tmp_inode->i_ctime = 0; BB FIXME */ + } /* Linux can not store file creation time unfortunately so ignore it */ @@ -956,7 +938,6 @@ static int cifs_save_resume_key(const char *current_entry, filename = &pFindData->FileName[0]; /* one byte length, no name conversion */ len = (unsigned int)pFindData->FileNameLength; - cifsFile->srch_inf.resume_key = pFindData->ResumeKey; } else { cFYI(1,("Unknown findfirst level %d",level)); return -EINVAL; diff --git a/trunk/fs/cifs/sess.c b/trunk/fs/cifs/sess.c index a8a083543ba0..22b4c35dcfe3 100644 --- a/trunk/fs/cifs/sess.c +++ b/trunk/fs/cifs/sess.c @@ -268,10 +268,6 @@ static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo ses->serverOS = kzalloc(len + 1, GFP_KERNEL); if(ses->serverOS) strncpy(ses->serverOS, bcc_ptr, len); - if(strncmp(ses->serverOS, "OS/2",4) == 0) { - cFYI(1,("OS/2 server")); - ses->flags |= CIFS_SES_OS2; - } bcc_ptr += len + 1; bleft -= len + 1; @@ -294,11 +290,16 @@ static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo if(len > bleft) return rc; - /* No domain field in LANMAN case. Domain is - returned by old servers in the SMB negprot response */ - /* BB For newer servers which do not support Unicode, - but thus do return domain here we could add parsing - for it later, but it is not very important */ + if(ses->serverDomain) + kfree(ses->serverDomain); + + ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); + if(ses->serverOS) + strncpy(ses->serverOS, bcc_ptr, len); + + bcc_ptr += len + 1; + bleft -= len + 1; + cFYI(1,("ascii: bytes left %d",bleft)); return rc; @@ -365,8 +366,6 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, str_area = kmalloc(2000, GFP_KERNEL); bcc_ptr = str_area; - ses->flags &= ~CIFS_SES_LANMAN; - if(type == LANMAN) { #ifdef CONFIG_CIFS_WEAK_PW_HASH char lnm_session_key[CIFS_SESS_KEY_SIZE]; @@ -378,7 +377,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, /* and copy into bcc */ calc_lanman_hash(ses, lnm_session_key); - ses->flags |= CIFS_SES_LANMAN; + /* #ifdef CONFIG_CIFS_DEBUG2 cifs_dump_mem("cryptkey: ",ses->server->cryptKey, CIFS_SESS_KEY_SIZE); diff --git a/trunk/fs/cifs/smbdes.c b/trunk/fs/cifs/smbdes.c index 7a1b2b961ec8..efaa044523a7 100644 --- a/trunk/fs/cifs/smbdes.c +++ b/trunk/fs/cifs/smbdes.c @@ -364,20 +364,20 @@ E_P24(unsigned char *p21, unsigned char *c8, unsigned char *p24) smbhash(p24 + 16, c8, p21 + 14, 1); } -#if 0 /* currently unsued */ -static void +void D_P16(unsigned char *p14, unsigned char *in, unsigned char *out) { smbhash(out, in, p14, 0); smbhash(out + 8, in + 8, p14 + 7, 0); } -static void +void E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out) { smbhash(out, in, p14, 1); smbhash(out + 8, in + 8, p14 + 7, 1); } +#if 0 /* these routines are currently unneeded, but may be needed later */ void diff --git a/trunk/fs/cifs/smbencrypt.c b/trunk/fs/cifs/smbencrypt.c index 4b25ba92180d..f518c5e45035 100644 --- a/trunk/fs/cifs/smbencrypt.c +++ b/trunk/fs/cifs/smbencrypt.c @@ -51,8 +51,11 @@ void SMBencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); void E_md4hash(const unsigned char *passwd, unsigned char *p16); +void nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16]); static void SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8, unsigned char p24[24]); +void NTLMSSPOWFencrypt(unsigned char passwd[8], + unsigned char *ntlmchalresp, unsigned char p24[24]); void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); /* @@ -141,9 +144,8 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16) memset(wpwd,0,129 * 2); } -#if 0 /* currently unused */ /* Does both the NT and LM owfs of a user's password */ -static void +void nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16]) { char passwd[514]; @@ -169,7 +171,6 @@ nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16]) /* clear out local copy of user's password (just being paranoid). */ memset(passwd, '\0', sizeof (passwd)); } -#endif /* Does the NTLMv2 owfs of a user's password */ #if 0 /* function not needed yet - but will be soon */ @@ -222,8 +223,7 @@ SMBOWFencrypt(unsigned char passwd[16], unsigned char *c8, } /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */ -#if 0 /* currently unused */ -static void +void NTLMSSPOWFencrypt(unsigned char passwd[8], unsigned char *ntlmchalresp, unsigned char p24[24]) { @@ -235,7 +235,6 @@ NTLMSSPOWFencrypt(unsigned char passwd[8], E_P24(p21, ntlmchalresp, p24); } -#endif /* Does the NT MD4 hash then des encryption. */ diff --git a/trunk/fs/compat_ioctl.c b/trunk/fs/compat_ioctl.c index a91f2628c981..27ca1aa30562 100644 --- a/trunk/fs/compat_ioctl.c +++ b/trunk/fs/compat_ioctl.c @@ -2438,17 +2438,13 @@ HANDLE_IOCTL(0x1260, broken_blkgetsize) HANDLE_IOCTL(BLKFRAGET, w_long) HANDLE_IOCTL(BLKSECTGET, w_long) HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans) +HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans) HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_WCACHE, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_ACOUSTIC, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_ADDRESS, hdio_ioctl_trans) -HANDLE_IOCTL(HDIO_GET_BUSSTATE, hdio_ioctl_trans) HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans) HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans) HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans) diff --git a/trunk/fs/dcache.c b/trunk/fs/dcache.c index 2bac4ba1d1d3..2355bddad8de 100644 --- a/trunk/fs/dcache.c +++ b/trunk/fs/dcache.c @@ -548,136 +548,6 @@ void shrink_dcache_sb(struct super_block * sb) spin_unlock(&dcache_lock); } -/* - * destroy a single subtree of dentries for unmount - * - see the comments on shrink_dcache_for_umount() for a description of the - * locking - */ -static void shrink_dcache_for_umount_subtree(struct dentry *dentry) -{ - struct dentry *parent; - - BUG_ON(!IS_ROOT(dentry)); - - /* detach this root from the system */ - spin_lock(&dcache_lock); - if (!list_empty(&dentry->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&dentry->d_lru); - } - __d_drop(dentry); - spin_unlock(&dcache_lock); - - for (;;) { - /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) { - struct dentry *loop; - - /* this is a branch with children - detach all of them - * from the system in one go */ - spin_lock(&dcache_lock); - list_for_each_entry(loop, &dentry->d_subdirs, - d_u.d_child) { - if (!list_empty(&loop->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&loop->d_lru); - } - - __d_drop(loop); - cond_resched_lock(&dcache_lock); - } - spin_unlock(&dcache_lock); - - /* move to the first child */ - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } - - /* consume the dentries from this leaf up through its parents - * until we find one with children or run out altogether */ - do { - struct inode *inode; - - if (atomic_read(&dentry->d_count) != 0) { - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - atomic_read(&dentry->d_count), - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } - - parent = dentry->d_parent; - if (parent == dentry) - parent = NULL; - else - atomic_dec(&parent->d_count); - - list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ - - inode = dentry->d_inode; - if (inode) { - dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } - - d_free(dentry); - - /* finished when we fall off the top of the tree, - * otherwise we ascend to the parent and move to the - * next sibling if there is one */ - if (!parent) - return; - - dentry = parent; - - } while (list_empty(&dentry->d_subdirs)); - - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } -} - -/* - * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: - * - the superblock is detached from all mountings and open files, so the - * dentry trees will not be rearranged by the VFS - * - s_umount is write-locked, so the memory pressure shrinker will ignore - * any dentries belonging to this superblock that it comes across - * - the filesystem itself is no longer permitted to rearrange the dentries - * in this superblock - */ -void shrink_dcache_for_umount(struct super_block *sb) -{ - struct dentry *dentry; - - if (down_read_trylock(&sb->s_umount)) - BUG(); - - dentry = sb->s_root; - sb->s_root = NULL; - atomic_dec(&dentry->d_count); - shrink_dcache_for_umount_subtree(dentry); - - while (!hlist_empty(&sb->s_anon)) { - dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); - shrink_dcache_for_umount_subtree(dentry); - } -} - /* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs diff --git a/trunk/fs/dlm/Kconfig b/trunk/fs/dlm/Kconfig index 81b2c6465eeb..490f85b3fa59 100644 --- a/trunk/fs/dlm/Kconfig +++ b/trunk/fs/dlm/Kconfig @@ -1,9 +1,10 @@ menu "Distributed Lock Manager" - depends on INET && IP_SCTP && EXPERIMENTAL + depends on INET && EXPERIMENTAL config DLM tristate "Distributed Lock Manager (DLM)" depends on IPV6 || IPV6=n + depends on IP_SCTP select CONFIGFS_FS help A general purpose distributed lock manager for kernel or userspace diff --git a/trunk/fs/dlm/lowcomms.c b/trunk/fs/dlm/lowcomms.c index 867f93d0417e..7bcea7c5addb 100644 --- a/trunk/fs/dlm/lowcomms.c +++ b/trunk/fs/dlm/lowcomms.c @@ -548,7 +548,7 @@ static int receive_from_sock(void) } len = iov[0].iov_len + iov[1].iov_len; - r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len, + r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, 1, len, MSG_NOSIGNAL | MSG_DONTWAIT); if (ret <= 0) goto out_close; diff --git a/trunk/fs/eventpoll.c b/trunk/fs/eventpoll.c index ae228ec54e94..557d5b614fae 100644 --- a/trunk/fs/eventpoll.c +++ b/trunk/fs/eventpoll.c @@ -105,8 +105,6 @@ /* Maximum msec timeout value storeable in a long int */ #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) -#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) - struct epoll_filefd { struct file *file; @@ -499,7 +497,7 @@ void eventpoll_release_file(struct file *file) */ asmlinkage long sys_epoll_create(int size) { - int error, fd = -1; + int error, fd; struct eventpoll *ep; struct inode *inode; struct file *file; @@ -642,6 +640,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) return error; } +#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) /* * Implement the event wait interface for the eventpoll file. It is the kernel @@ -658,7 +657,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, current, epfd, events, maxevents, timeout)); /* The maximum number of event must be greater than zero */ - if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) + if (maxevents <= 0 || maxevents > MAX_EVENTS) return -EINVAL; /* Verify that the area passed by the user is writeable */ @@ -700,55 +699,6 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, } -#ifdef TIF_RESTORE_SIGMASK - -/* - * Implement the event wait interface for the eventpoll file. It is the kernel - * part of the user space epoll_pwait(2). - */ -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, const sigset_t __user *sigmask, - size_t sigsetsize) -{ - int error; - sigset_t ksigmask, sigsaved; - - /* - * If the caller wants a certain signal mask to be set during the wait, - * we apply it here. - */ - if (sigmask) { - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - error = sys_epoll_wait(epfd, events, maxevents, timeout); - - /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. - */ - if (sigmask) { - if (error == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); - } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - } - - return error; -} - -#endif /* #ifdef TIF_RESTORE_SIGMASK */ - - /* * Creates the file descriptor to be used by the epoll interface. */ diff --git a/trunk/fs/ext2/super.c b/trunk/fs/ext2/super.c index d8b9abd95d07..513cd421ac0b 100644 --- a/trunk/fs/ext2/super.c +++ b/trunk/fs/ext2/super.c @@ -364,6 +364,7 @@ static int parse_options (char * options, { char * p; substring_t args[MAX_OPT_ARGS]; + unsigned long kind = EXT2_MOUNT_ERRORS_CONT; int option; if (!options) @@ -403,19 +404,13 @@ static int parse_options (char * options, /* *sb_block = match_int(&args[0]); */ break; case Opt_err_panic: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_RO); - set_opt (sbi->s_mount_opt, ERRORS_PANIC); + kind = EXT2_MOUNT_ERRORS_PANIC; break; case Opt_err_ro: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_RO); + kind = EXT2_MOUNT_ERRORS_RO; break; case Opt_err_cont: - clear_opt (sbi->s_mount_opt, ERRORS_RO); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_CONT); + kind = EXT2_MOUNT_ERRORS_CONT; break; case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); @@ -494,6 +489,7 @@ static int parse_options (char * options, return 0; } } + sbi->s_mount_opt |= kind; return 1; } @@ -719,8 +715,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, ERRORS_PANIC); else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) set_opt(sbi->s_mount_opt, ERRORS_RO); - else - set_opt(sbi->s_mount_opt, ERRORS_CONT); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); diff --git a/trunk/fs/ext3/super.c b/trunk/fs/ext3/super.c index afc2d4f42d77..8bfd56ef18ca 100644 --- a/trunk/fs/ext3/super.c +++ b/trunk/fs/ext3/super.c @@ -1470,8 +1470,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, ERRORS_PANIC); else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO) set_opt(sbi->s_mount_opt, ERRORS_RO); - else - set_opt(sbi->s_mount_opt, ERRORS_CONT); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); diff --git a/trunk/fs/ext4/Makefile b/trunk/fs/ext4/Makefile deleted file mode 100644 index a6acb96ebeb9..000000000000 --- a/trunk/fs/ext4/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# -# Makefile for the linux ext4-filesystem routines. -# - -obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o - -ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o - -ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o -ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o diff --git a/trunk/fs/ext4/acl.c b/trunk/fs/ext4/acl.c deleted file mode 100644 index 9e882546d91a..000000000000 --- a/trunk/fs/ext4/acl.c +++ /dev/null @@ -1,551 +0,0 @@ -/* - * linux/fs/ext4/acl.c - * - * Copyright (C) 2001-2003 Andreas Gruenbacher, - */ - -#include -#include -#include -#include -#include -#include -#include -#include "xattr.h" -#include "acl.h" - -/* - * Convert from filesystem to in-memory representation. - */ -static struct posix_acl * -ext4_acl_from_disk(const void *value, size_t size) -{ - const char *end = (char *)value + size; - int n, count; - struct posix_acl *acl; - - if (!value) - return NULL; - if (size < sizeof(ext4_acl_header)) - return ERR_PTR(-EINVAL); - if (((ext4_acl_header *)value)->a_version != - cpu_to_le32(EXT4_ACL_VERSION)) - return ERR_PTR(-EINVAL); - value = (char *)value + sizeof(ext4_acl_header); - count = ext4_acl_count(size); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); - for (n=0; n < count; n++) { - ext4_acl_entry *entry = - (ext4_acl_entry *)value; - if ((char *)value + sizeof(ext4_acl_entry_short) > end) - goto fail; - acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); - acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(ext4_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; - break; - - case ACL_USER: - case ACL_GROUP: - value = (char *)value + sizeof(ext4_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); - break; - - default: - goto fail; - } - } - if (value != end) - goto fail; - return acl; - -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -/* - * Convert from in-memory to filesystem representation. - */ -static void * -ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) -{ - ext4_acl_header *ext_acl; - char *e; - size_t n; - - *size = ext4_acl_size(acl->a_count); - ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * - sizeof(ext4_acl_entry), GFP_KERNEL); - if (!ext_acl) - return ERR_PTR(-ENOMEM); - ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); - e = (char *)ext_acl + sizeof(ext4_acl_header); - for (n=0; n < acl->a_count; n++) { - ext4_acl_entry *entry = (ext4_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER: - case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); - e += sizeof(ext4_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(ext4_acl_entry_short); - break; - - default: - goto fail; - } - } - return (char *)ext_acl; - -fail: - kfree(ext_acl); - return ERR_PTR(-EINVAL); -} - -static inline struct posix_acl * -ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = EXT4_ACL_NOT_CACHED; - - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - - return acl; -} - -static inline void -ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - -/* - * Inode operation get_posix_acl(). - * - * inode->i_mutex: don't care - */ -static struct posix_acl * -ext4_get_acl(struct inode *inode, int type) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - int name_index; - char *value = NULL; - struct posix_acl *acl; - int retval; - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return NULL; - - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext4_iget_acl(inode, &ei->i_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; - name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext4_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; - name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); - } - retval = ext4_xattr_get(inode, name_index, "", NULL, 0); - if (retval > 0) { - value = kmalloc(retval, GFP_KERNEL); - if (!value) - return ERR_PTR(-ENOMEM); - retval = ext4_xattr_get(inode, name_index, "", value, retval); - } - if (retval > 0) - acl = ext4_acl_from_disk(value, retval); - else if (retval == -ENODATA || retval == -ENOSYS) - acl = NULL; - else - acl = ERR_PTR(retval); - kfree(value); - - if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } - return acl; -} - -/* - * Set the access or default ACL of an inode. - * - * inode->i_mutex: down unless called from ext4_new_inode - */ -static int -ext4_set_acl(handle_t *handle, struct inode *inode, int type, - struct posix_acl *acl) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - int name_index; - void *value = NULL; - size_t size = 0; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch(type) { - case ACL_TYPE_ACCESS: - name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); - if (error < 0) - return error; - else { - inode->i_mode = mode; - ext4_mark_inode_dirty(handle, inode); - if (error == 0) - acl = NULL; - } - } - break; - - case ACL_TYPE_DEFAULT: - name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - - default: - return -EINVAL; - } - if (acl) { - value = ext4_acl_to_disk(acl, &size); - if (IS_ERR(value)) - return (int)PTR_ERR(value); - } - - error = ext4_xattr_set_handle(handle, inode, name_index, "", - value, size, 0); - - kfree(value); - if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } - return error; -} - -static int -ext4_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); - - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - -int -ext4_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return generic_permission(inode, mask, ext4_check_acl); -} - -/* - * Initialize the ACLs of a new inode. Called from ext4_new_inode. - * - * dir->i_mutex: down - * inode->i_mutex: up (access to inode is still exclusive) - */ -int -ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) -{ - struct posix_acl *acl = NULL; - int error = 0; - - if (!S_ISLNK(inode->i_mode)) { - if (test_opt(dir->i_sb, POSIX_ACL)) { - acl = ext4_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } - if (!acl) - inode->i_mode &= ~current->fs->umask; - } - if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - struct posix_acl *clone; - mode_t mode; - - if (S_ISDIR(inode->i_mode)) { - error = ext4_set_acl(handle, inode, - ACL_TYPE_DEFAULT, acl); - if (error) - goto cleanup; - } - clone = posix_acl_clone(acl, GFP_KERNEL); - error = -ENOMEM; - if (!clone) - goto cleanup; - - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); - if (error >= 0) { - inode->i_mode = mode; - if (error > 0) { - /* This is an extended ACL */ - error = ext4_set_acl(handle, inode, - ACL_TYPE_ACCESS, clone); - } - } - posix_acl_release(clone); - } -cleanup: - posix_acl_release(acl); - return error; -} - -/* - * Does chmod for an inode that may have an Access Control List. The - * inode->i_mode field must be updated to the desired value by the caller - * before calling this function. - * Returns 0 on success, or a negative error number. - * - * We change the ACL rather than storing some ACL entries in the file - * mode permission bits (which would be more efficient), because that - * would break once additional permissions (like ACL_APPEND, ACL_DELETE - * for directories) are added. There are no more bits available in the - * file mode. - * - * inode->i_mutex: down - */ -int -ext4_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl, *clone; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) { - handle_t *handle; - int retries = 0; - - retry: - handle = ext4_journal_start(inode, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - ext4_std_error(inode->i_sb, error); - goto out; - } - error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, clone); - ext4_journal_stop(handle); - if (error == -ENOSPC && - ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - } -out: - posix_acl_release(clone); - return error; -} - -/* - * Extended attribute handlers - */ -static size_t -ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, - const char *name, size_t name_len) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - if (list && size <= list_len) - memcpy(list, POSIX_ACL_XATTR_ACCESS, size); - return size; -} - -static size_t -ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, - const char *name, size_t name_len) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - if (list && size <= list_len) - memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); - return size; -} - -static int -ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) -{ - struct posix_acl *acl; - int error; - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return -EOPNOTSUPP; - - acl = ext4_get_acl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int -ext4_xattr_get_acl_access(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); -} - -static int -ext4_xattr_get_acl_default(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); -} - -static int -ext4_xattr_set_acl(struct inode *inode, int type, const void *value, - size_t size) -{ - handle_t *handle; - struct posix_acl *acl; - int error, retries = 0; - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return -EOPNOTSUPP; - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - - if (value) { - acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - else if (acl) { - error = posix_acl_valid(acl); - if (error) - goto release_and_out; - } - } else - acl = NULL; - -retry: - handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - error = ext4_set_acl(handle, inode, type, acl); - ext4_journal_stop(handle); - if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - -release_and_out: - posix_acl_release(acl); - return error; -} - -static int -ext4_xattr_set_acl_access(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); -} - -static int -ext4_xattr_set_acl_default(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); -} - -struct xattr_handler ext4_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .list = ext4_xattr_list_acl_access, - .get = ext4_xattr_get_acl_access, - .set = ext4_xattr_set_acl_access, -}; - -struct xattr_handler ext4_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .list = ext4_xattr_list_acl_default, - .get = ext4_xattr_get_acl_default, - .set = ext4_xattr_set_acl_default, -}; diff --git a/trunk/fs/ext4/acl.h b/trunk/fs/ext4/acl.h deleted file mode 100644 index 26a5c1abf147..000000000000 --- a/trunk/fs/ext4/acl.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - File: fs/ext4/acl.h - - (C) 2001 Andreas Gruenbacher, -*/ - -#include - -#define EXT4_ACL_VERSION 0x0001 - -typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -} ext4_acl_entry; - -typedef struct { - __le16 e_tag; - __le16 e_perm; -} ext4_acl_entry_short; - -typedef struct { - __le32 a_version; -} ext4_acl_header; - -static inline size_t ext4_acl_size(int count) -{ - if (count <= 4) { - return sizeof(ext4_acl_header) + - count * sizeof(ext4_acl_entry_short); - } else { - return sizeof(ext4_acl_header) + - 4 * sizeof(ext4_acl_entry_short) + - (count - 4) * sizeof(ext4_acl_entry); - } -} - -static inline int ext4_acl_count(size_t size) -{ - ssize_t s; - size -= sizeof(ext4_acl_header); - s = size - 4 * sizeof(ext4_acl_entry_short); - if (s < 0) { - if (size % sizeof(ext4_acl_entry_short)) - return -1; - return size / sizeof(ext4_acl_entry_short); - } else { - if (s % sizeof(ext4_acl_entry)) - return -1; - return s / sizeof(ext4_acl_entry) + 4; - } -} - -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - -/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl - if the ACL has not been cached */ -#define EXT4_ACL_NOT_CACHED ((void *)-1) - -/* acl.c */ -extern int ext4_permission (struct inode *, int, struct nameidata *); -extern int ext4_acl_chmod (struct inode *); -extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); - -#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ -#include -#define ext4_permission NULL - -static inline int -ext4_acl_chmod(struct inode *inode) -{ - return 0; -} - -static inline int -ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) -{ - return 0; -} -#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ - diff --git a/trunk/fs/ext4/balloc.c b/trunk/fs/ext4/balloc.c deleted file mode 100644 index 5d45582f9517..000000000000 --- a/trunk/fs/ext4/balloc.c +++ /dev/null @@ -1,1833 +0,0 @@ -/* - * linux/fs/ext4/balloc.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * balloc.c contains the blocks allocation and deallocation routines - */ - -/* - * Calculate the block group number and offset, given a block number - */ -void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, - unsigned long *blockgrpp, ext4_grpblk_t *offsetp) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - ext4_grpblk_t offset; - - blocknr = blocknr - le32_to_cpu(es->s_first_data_block); - offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); - if (offsetp) - *offsetp = offset; - if (blockgrpp) - *blockgrpp = blocknr; - -} - -/* - * The free blocks are managed by bitmaps. A file system contains several - * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap - * block for inodes, N blocks for the inode table and data blocks. - * - * The file system contains group descriptors which are located after the - * super block. Each descriptor contains the number of the bitmap block and - * the free blocks count in the block. The descriptors are loaded in memory - * when a file system is mounted (see ext4_read_super). - */ - - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - -/** - * ext4_get_group_desc() -- load group descriptor from disk - * @sb: super block - * @block_group: given block group - * @bh: pointer to the buffer head to store the block - * group descriptor - */ -struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, - unsigned int block_group, - struct buffer_head ** bh) -{ - unsigned long group_desc; - unsigned long offset; - struct ext4_group_desc * desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (block_group >= sbi->s_groups_count) { - ext4_error (sb, "ext4_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); - - return NULL; - } - smp_rmb(); - - group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); - offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { - ext4_error (sb, "ext4_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); - return NULL; - } - - desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + - offset * EXT4_DESC_SIZE(sb)); - if (bh) - *bh = sbi->s_group_desc[group_desc]; - return desc; -} - -/** - * read_block_bitmap() - * @sb: super block - * @block_group: given block group - * - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. - * - * Return buffer_head on success or NULL in case of failure. - */ -static struct buffer_head * -read_block_bitmap(struct super_block *sb, unsigned int block_group) -{ - struct ext4_group_desc * desc; - struct buffer_head * bh = NULL; - - desc = ext4_get_group_desc (sb, block_group, NULL); - if (!desc) - goto error_out; - bh = sb_bread(sb, ext4_block_bitmap(sb, desc)); - if (!bh) - ext4_error (sb, "read_block_bitmap", - "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %llu", - block_group, - ext4_block_bitmap(sb, desc)); -error_out: - return bh; -} -/* - * The reservation window structure operations - * -------------------------------------------- - * Operations include: - * dump, find, add, remove, is_empty, find_next_reservable_window, etc. - * - * We use a red-black tree to represent per-filesystem reservation - * windows. - * - */ - -/** - * __rsv_window_dump() -- Dump the filesystem block allocation reservation map - * @rb_root: root of per-filesystem reservation rb tree - * @verbose: verbose mode - * @fn: function which wishes to dump the reservation map - * - * If verbose is turned on, it will print the whole block reservation - * windows(start, end). Otherwise, it will only print out the "bad" windows, - * those windows that overlap with their immediate neighbors. - */ -#if 1 -static void __rsv_window_dump(struct rb_root *root, int verbose, - const char *fn) -{ - struct rb_node *n; - struct ext4_reserve_window_node *rsv, *prev; - int bad; - -restart: - n = rb_first(root); - bad = 0; - prev = NULL; - - printk("Block Allocation Reservation Windows Map (%s):\n", fn); - while (n) { - rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node); - if (verbose) - printk("reservation window 0x%p " - "start: %llu, end: %llu\n", - rsv, rsv->rsv_start, rsv->rsv_end); - if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { - printk("Bad reservation %p (start >= end)\n", - rsv); - bad = 1; - } - if (prev && prev->rsv_end >= rsv->rsv_start) { - printk("Bad reservation %p (prev->end >= start)\n", - rsv); - bad = 1; - } - if (bad) { - if (!verbose) { - printk("Restarting reservation walk in verbose mode\n"); - verbose = 1; - goto restart; - } - } - n = rb_next(n); - prev = rsv; - } - printk("Window map complete.\n"); - if (bad) - BUG(); -} -#define rsv_window_dump(root, verbose) \ - __rsv_window_dump((root), (verbose), __FUNCTION__) -#else -#define rsv_window_dump(root, verbose) do {} while (0) -#endif - -/** - * goal_in_my_reservation() - * @rsv: inode's reservation window - * @grp_goal: given goal block relative to the allocation block group - * @group: the current allocation block group - * @sb: filesystem super block - * - * Test if the given goal block (group relative) is within the file's - * own block reservation window range. - * - * If the reservation window is outside the goal allocation group, return 0; - * grp_goal (given goal block) could be -1, which means no specific - * goal block. In this case, always return 1. - * If the goal block is within the reservation window, return 1; - * otherwise, return 0; - */ -static int -goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, - unsigned int group, struct super_block * sb) -{ - ext4_fsblk_t group_first_block, group_last_block; - - group_first_block = ext4_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - if ((rsv->_rsv_start > group_last_block) || - (rsv->_rsv_end < group_first_block)) - return 0; - if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) - || (grp_goal + group_first_block > rsv->_rsv_end))) - return 0; - return 1; -} - -/** - * search_reserve_window() - * @rb_root: root of reservation tree - * @goal: target allocation block - * - * Find the reserved window which includes the goal, or the previous one - * if the goal is not in any window. - * Returns NULL if there are no windows or if all windows start after the goal. - */ -static struct ext4_reserve_window_node * -search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) -{ - struct rb_node *n = root->rb_node; - struct ext4_reserve_window_node *rsv; - - if (!n) - return NULL; - - do { - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); - - if (goal < rsv->rsv_start) - n = n->rb_left; - else if (goal > rsv->rsv_end) - n = n->rb_right; - else - return rsv; - } while (n); - /* - * We've fallen off the end of the tree: the goal wasn't inside - * any particular node. OK, the previous node must be to one - * side of the interval containing the goal. If it's the RHS, - * we need to back up one. - */ - if (rsv->rsv_start > goal) { - n = rb_prev(&rsv->rsv_node); - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); - } - return rsv; -} - -/** - * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. - * @sb: super block - * @rsv: reservation window to add - * - * Must be called with rsv_lock hold. - */ -void ext4_rsv_window_add(struct super_block *sb, - struct ext4_reserve_window_node *rsv) -{ - struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; - struct rb_node *node = &rsv->rsv_node; - ext4_fsblk_t start = rsv->rsv_start; - - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; - struct ext4_reserve_window_node *this; - - while (*p) - { - parent = *p; - this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); - - if (start < this->rsv_start) - p = &(*p)->rb_left; - else if (start > this->rsv_end) - p = &(*p)->rb_right; - else { - rsv_window_dump(root, 1); - BUG(); - } - } - - rb_link_node(node, parent, p); - rb_insert_color(node, root); -} - -/** - * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree - * @sb: super block - * @rsv: reservation window to remove - * - * Mark the block reservation window as not allocated, and unlink it - * from the filesystem reservation window rb tree. Must be called with - * rsv_lock hold. - */ -static void rsv_window_remove(struct super_block *sb, - struct ext4_reserve_window_node *rsv) -{ - rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_alloc_hit = 0; - rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); -} - -/* - * rsv_is_empty() -- Check if the reservation window is allocated. - * @rsv: given reservation window to check - * - * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. - */ -static inline int rsv_is_empty(struct ext4_reserve_window *rsv) -{ - /* a valid reservation end block could not be 0 */ - return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; -} - -/** - * ext4_init_block_alloc_info() - * @inode: file inode structure - * - * Allocate and initialize the reservation window structure, and - * link the window to the ext4 inode structure at last - * - * The reservation window structure is only dynamically allocated - * and linked to ext4 inode the first time the open file - * needs a new block. So, before every ext4_new_block(s) call, for - * regular files, we should check whether the reservation window - * structure exists or not. In the latter case, this function is called. - * Fail to do so will result in block reservation being turned off for that - * open file. - * - * This function is called from ext4_get_blocks_handle(), also called - * when setting the reservation window size through ioctl before the file - * is open for write (needs block allocation). - * - * Needs truncate_mutex protection prior to call this function. - */ -void ext4_init_block_alloc_info(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; - struct super_block *sb = inode->i_sb; - - block_i = kmalloc(sizeof(*block_i), GFP_NOFS); - if (block_i) { - struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; - - rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - - /* - * if filesystem is mounted with NORESERVATION, the goal - * reservation window size is set to zero to indicate - * block reservation is off - */ - if (!test_opt(sb, RESERVATION)) - rsv->rsv_goal_size = 0; - else - rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; - rsv->rsv_alloc_hit = 0; - block_i->last_alloc_logical_block = 0; - block_i->last_alloc_physical_block = 0; - } - ei->i_block_alloc_info = block_i; -} - -/** - * ext4_discard_reservation() - * @inode: inode - * - * Discard(free) block reservation window on last file close, or truncate - * or at last iput(). - * - * It is being called in three cases: - * ext4_release_file(): last writer close the file - * ext4_clear_inode(): last iput(), when nobody link to this file. - * ext4_truncate(): when the block indirect map is about to change. - * - */ -void ext4_discard_reservation(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; - struct ext4_reserve_window_node *rsv; - spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; - - if (!block_i) - return; - - rsv = &block_i->rsv_window_node; - if (!rsv_is_empty(&rsv->rsv_window)) { - spin_lock(rsv_lock); - if (!rsv_is_empty(&rsv->rsv_window)) - rsv_window_remove(inode->i_sb, rsv); - spin_unlock(rsv_lock); - } -} - -/** - * ext4_free_blocks_sb() -- Free given blocks and update quota - * @handle: handle to this transaction - * @sb: super block - * @block: start physcial block to free - * @count: number of blocks to free - * @pdquot_freed_blocks: pointer to quota - */ -void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count, - unsigned long *pdquot_freed_blocks) -{ - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; - unsigned long block_group; - ext4_grpblk_t bit; - unsigned long i; - unsigned long overflow; - struct ext4_group_desc * desc; - struct ext4_super_block * es; - struct ext4_sb_info *sbi; - int err = 0, ret; - ext4_grpblk_t group_freed; - - *pdquot_freed_blocks = 0; - sbi = EXT4_SB(sb); - es = sbi->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - block + count > ext4_blocks_count(es)) { - ext4_error (sb, "ext4_free_blocks", - "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); - goto error_return; - } - - ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); - -do_more: - overflow = 0; - ext4_get_group_no_and_offset(sb, block, &block_group, &bit); - /* - * Check to see if we are freeing blocks across a group - * boundary. - */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { - overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); - count -= overflow; - } - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, block_group); - if (!bitmap_bh) - goto error_return; - desc = ext4_get_group_desc (sb, block_group, &gd_bh); - if (!desc) - goto error_return; - - if (in_range(ext4_block_bitmap(sb, desc), block, count) || - in_range(ext4_inode_bitmap(sb, desc), block, count) || - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) - ext4_error (sb, "ext4_free_blocks", - "Freeing blocks in system zones - " - "Block = %llu, count = %lu", - block, count); - - /* - * We are about to start releasing blocks in the bitmap, - * so we need undo access. - */ - /* @@@ check errors */ - BUFFER_TRACE(bitmap_bh, "getting undo access"); - err = ext4_journal_get_undo_access(handle, bitmap_bh); - if (err) - goto error_return; - - /* - * We are about to modify some metadata. Call the journal APIs - * to unshare ->b_data if a currently-committing transaction is - * using it - */ - BUFFER_TRACE(gd_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gd_bh); - if (err) - goto error_return; - - jbd_lock_bh_state(bitmap_bh); - - for (i = 0, group_freed = 0; i < count; i++) { - /* - * An HJ special. This is expensive... - */ -#ifdef CONFIG_JBD_DEBUG - jbd_unlock_bh_state(bitmap_bh); - { - struct buffer_head *debug_bh; - debug_bh = sb_find_get_block(sb, block + i); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "Deleted!"); - if (!bh2jh(bitmap_bh)->b_committed_data) - BUFFER_TRACE(debug_bh, - "No commited data in bitmap"); - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); - __brelse(debug_bh); - } - } - jbd_lock_bh_state(bitmap_bh); -#endif - if (need_resched()) { - jbd_unlock_bh_state(bitmap_bh); - cond_resched(); - jbd_lock_bh_state(bitmap_bh); - } - /* @@@ This prevents newly-allocated data from being - * freed and then reallocated within the same - * transaction. - * - * Ideally we would want to allow that to happen, but to - * do so requires making jbd2_journal_forget() capable of - * revoking the queued write of a data block, which - * implies blocking on the journal lock. *forget() - * cannot block due to truncate races. - * - * Eventually we can fix this by making jbd2_journal_forget() - * return a status indicating whether or not it was able - * to revoke the buffer. On successful revoke, it is - * safe not to set the allocation bit in the committed - * bitmap, because we know that there is no outstanding - * activity on the buffer any more and so it is safe to - * reallocate it. - */ - BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); - J_ASSERT_BH(bitmap_bh, - bh2jh(bitmap_bh)->b_committed_data != NULL); - ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, - bh2jh(bitmap_bh)->b_committed_data); - - /* - * We clear the bit in the bitmap after setting the committed - * data bit, because this is the reverse order to that which - * the allocator uses. - */ - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), - bit + i, bitmap_bh->b_data)) { - jbd_unlock_bh_state(bitmap_bh); - ext4_error(sb, __FUNCTION__, - "bit already cleared for block %llu", - (ext4_fsblk_t)(block + i)); - jbd_lock_bh_state(bitmap_bh); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - group_freed++; - } - } - jbd_unlock_bh_state(bitmap_bh); - - spin_lock(sb_bgl_lock(sbi, block_group)); - desc->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + - group_freed); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); - - /* And the group descriptor block */ - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_journal_dirty_metadata(handle, gd_bh); - if (!err) err = ret; - *pdquot_freed_blocks += group_freed; - - if (overflow && !err) { - block += count; - count = overflow; - goto do_more; - } - sb->s_dirt = 1; -error_return: - brelse(bitmap_bh); - ext4_std_error(sb, err); - return; -} - -/** - * ext4_free_blocks() -- Free given blocks and update quota - * @handle: handle for this transaction - * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count - */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count) -{ - struct super_block * sb; - unsigned long dquot_freed_blocks; - - sb = inode->i_sb; - if (!sb) { - printk ("ext4_free_blocks: nonexistent device"); - return; - } - ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) - DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); - return; -} - -/** - * ext4_test_allocatable() - * @nr: given allocation block group - * @bh: bufferhead contains the bitmap of the given block group - * - * For ext4 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This - * prevents deletes from freeing up the page for reuse until we have - * committed the delete transaction. - * - * If we didn't do this, then deleting something and reallocating it as - * data would allow the old block to be overwritten before the - * transaction committed (because we force data to disk before commit). - * This would lead to corruption if we crashed between overwriting the - * data and committing the delete. - * - * @@@ We may want to make this allocation behaviour conditional on - * data-writes at some point, and disable it for metadata allocations or - * sync-data inodes. - */ -static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) -{ - int ret; - struct journal_head *jh = bh2jh(bh); - - if (ext4_test_bit(nr, bh->b_data)) - return 0; - - jbd_lock_bh_state(bh); - if (!jh->b_committed_data) - ret = 1; - else - ret = !ext4_test_bit(nr, jh->b_committed_data); - jbd_unlock_bh_state(bh); - return ret; -} - -/** - * bitmap_search_next_usable_block() - * @start: the starting block (group relative) of the search - * @bh: bufferhead contains the block group bitmap - * @maxblocks: the ending block (group relative) of the reservation - * - * The bitmap search --- search forward alternately through the actual - * bitmap on disk and the last-committed copy in journal, until we find a - * bit free in both bitmaps. - */ -static ext4_grpblk_t -bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, - ext4_grpblk_t maxblocks) -{ - ext4_grpblk_t next; - struct journal_head *jh = bh2jh(bh); - - while (start < maxblocks) { - next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); - if (next >= maxblocks) - return -1; - if (ext4_test_allocatable(next, bh)) - return next; - jbd_lock_bh_state(bh); - if (jh->b_committed_data) - start = ext4_find_next_zero_bit(jh->b_committed_data, - maxblocks, next); - jbd_unlock_bh_state(bh); - } - return -1; -} - -/** - * find_next_usable_block() - * @start: the starting block (group relative) to find next - * allocatable block in bitmap. - * @bh: bufferhead contains the block group bitmap - * @maxblocks: the ending block (group relative) for the search - * - * Find an allocatable block in a bitmap. We honor both the bitmap and - * its last-committed copy (if that exists), and perform the "most - * appropriate allocation" algorithm of looking for a free block near - * the initial goal; then for a free byte somewhere in the bitmap; then - * for any free bit in the bitmap. - */ -static ext4_grpblk_t -find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, - ext4_grpblk_t maxblocks) -{ - ext4_grpblk_t here, next; - char *p, *r; - - if (start > 0) { - /* - * The goal was occupied; search forward for a free - * block within the next XX blocks. - * - * end_goal is more or less random, but it has to be - * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the - * next 64-bit boundary is simple.. - */ - ext4_grpblk_t end_goal = (start + 63) & ~63; - if (end_goal > maxblocks) - end_goal = maxblocks; - here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); - if (here < end_goal && ext4_test_allocatable(here, bh)) - return here; - ext4_debug("Bit not found near goal\n"); - } - - here = start; - if (here < 0) - here = 0; - - p = ((char *)bh->b_data) + (here >> 3); - r = memscan(p, 0, (maxblocks - here + 7) >> 3); - next = (r - ((char *)bh->b_data)) << 3; - - if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) - return next; - - /* - * The bitmap search --- search forward alternately through the actual - * bitmap and the last-committed copy until we find a bit free in - * both - */ - here = bitmap_search_next_usable_block(here, bh, maxblocks); - return here; -} - -/** - * claim_block() - * @block: the free block (group relative) to allocate - * @bh: the bufferhead containts the block group bitmap - * - * We think we can allocate this block in this bitmap. Try to set the bit. - * If that succeeds then check that nobody has allocated and then freed the - * block since we saw that is was not marked in b_committed_data. If it _was_ - * allocated and freed then clear the bit in the bitmap again and return - * zero (failure). - */ -static inline int -claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) -{ - struct journal_head *jh = bh2jh(bh); - int ret; - - if (ext4_set_bit_atomic(lock, block, bh->b_data)) - return 0; - jbd_lock_bh_state(bh); - if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { - ext4_clear_bit_atomic(lock, block, bh->b_data); - ret = 0; - } else { - ret = 1; - } - jbd_unlock_bh_state(bh); - return ret; -} - -/** - * ext4_try_to_allocate() - * @sb: superblock - * @handle: handle to this transaction - * @group: given allocation block group - * @bitmap_bh: bufferhead holds the block bitmap - * @grp_goal: given target block within the group - * @count: target number of blocks to allocate - * @my_rsv: reservation window - * - * Attempt to allocate blocks within a give range. Set the range of allocation - * first, then find the first free bit(s) from the bitmap (within the range), - * and at last, allocate the blocks by claiming the found free bit as allocated. - * - * To set the range of this allocation: - * if there is a reservation window, only try to allocate block(s) from the - * file's own reservation window; - * Otherwise, the allocation range starts from the give goal block, ends at - * the block group's last block. - * - * If we failed to allocate the desired block then we may end up crossing to a - * new bitmap. In that case we must release write access to the old one via - * ext4_journal_release_buffer(), else we'll run out of credits. - */ -static ext4_grpblk_t -ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group, - struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal, - unsigned long *count, struct ext4_reserve_window *my_rsv) -{ - ext4_fsblk_t group_first_block; - ext4_grpblk_t start, end; - unsigned long num = 0; - - /* we do allocation within the reservation window if we have a window */ - if (my_rsv) { - group_first_block = ext4_group_first_block_no(sb, group); - if (my_rsv->_rsv_start >= group_first_block) - start = my_rsv->_rsv_start - group_first_block; - else - /* reservation window cross group boundary */ - start = 0; - end = my_rsv->_rsv_end - group_first_block + 1; - if (end > EXT4_BLOCKS_PER_GROUP(sb)) - /* reservation window crosses group boundary */ - end = EXT4_BLOCKS_PER_GROUP(sb); - if ((start <= grp_goal) && (grp_goal < end)) - start = grp_goal; - else - grp_goal = -1; - } else { - if (grp_goal > 0) - start = grp_goal; - else - start = 0; - end = EXT4_BLOCKS_PER_GROUP(sb); - } - - BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); - -repeat: - if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { - grp_goal = find_next_usable_block(start, bitmap_bh, end); - if (grp_goal < 0) - goto fail_access; - if (!my_rsv) { - int i; - - for (i = 0; i < 7 && grp_goal > start && - ext4_test_allocatable(grp_goal - 1, - bitmap_bh); - i++, grp_goal--) - ; - } - } - start = grp_goal; - - if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), - grp_goal, bitmap_bh)) { - /* - * The block was allocated by another thread, or it was - * allocated and then freed by another thread - */ - start++; - grp_goal++; - if (start >= end) - goto fail_access; - goto repeat; - } - num++; - grp_goal++; - while (num < *count && grp_goal < end - && ext4_test_allocatable(grp_goal, bitmap_bh) - && claim_block(sb_bgl_lock(EXT4_SB(sb), group), - grp_goal, bitmap_bh)) { - num++; - grp_goal++; - } - *count = num; - return grp_goal - num; -fail_access: - *count = num; - return -1; -} - -/** - * find_next_reservable_window(): - * find a reservable space within the given range. - * It does not allocate the reservation window for now: - * alloc_new_reservation() will do the work later. - * - * @search_head: the head of the searching list; - * This is not necessarily the list head of the whole filesystem - * - * We have both head and start_block to assist the search - * for the reservable space. The list starts from head, - * but we will shift to the place where start_block is, - * then start from there, when looking for a reservable space. - * - * @size: the target new reservation window size - * - * @group_first_block: the first block we consider to start - * the real search from - * - * @last_block: - * the maximum block number that our goal reservable space - * could start from. This is normally the last block in this - * group. The search will end when we found the start of next - * possible reservable space is out of this boundary. - * This could handle the cross boundary reservation window - * request. - * - * basically we search from the given range, rather than the whole - * reservation double linked list, (start_block, last_block) - * to find a free region that is of my size and has not - * been reserved. - * - */ -static int find_next_reservable_window( - struct ext4_reserve_window_node *search_head, - struct ext4_reserve_window_node *my_rsv, - struct super_block * sb, - ext4_fsblk_t start_block, - ext4_fsblk_t last_block) -{ - struct rb_node *next; - struct ext4_reserve_window_node *rsv, *prev; - ext4_fsblk_t cur; - int size = my_rsv->rsv_goal_size; - - /* TODO: make the start of the reservation window byte-aligned */ - /* cur = *start_block & ~7;*/ - cur = start_block; - rsv = search_head; - if (!rsv) - return -1; - - while (1) { - if (cur <= rsv->rsv_end) - cur = rsv->rsv_end + 1; - - /* TODO? - * in the case we could not find a reservable space - * that is what is expected, during the re-search, we could - * remember what's the largest reservable space we could have - * and return that one. - * - * For now it will fail if we could not find the reservable - * space with expected-size (or more)... - */ - if (cur > last_block) - return -1; /* fail */ - - prev = rsv; - next = rb_next(&rsv->rsv_node); - rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node); - - /* - * Reached the last reservation, we can just append to the - * previous one. - */ - if (!next) - break; - - if (cur + size <= rsv->rsv_start) { - /* - * Found a reserveable space big enough. We could - * have a reservation across the group boundary here - */ - break; - } - } - /* - * we come here either : - * when we reach the end of the whole list, - * and there is empty reservable space after last entry in the list. - * append it to the end of the list. - * - * or we found one reservable space in the middle of the list, - * return the reservation window that we could append to. - * succeed. - */ - - if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) - rsv_window_remove(sb, my_rsv); - - /* - * Let's book the whole avaliable window for now. We will check the - * disk bitmap later and then, if there are free blocks then we adjust - * the window size if it's larger than requested. - * Otherwise, we will remove this node from the tree next time - * call find_next_reservable_window. - */ - my_rsv->rsv_start = cur; - my_rsv->rsv_end = cur + size - 1; - my_rsv->rsv_alloc_hit = 0; - - if (prev != my_rsv) - ext4_rsv_window_add(sb, my_rsv); - - return 0; -} - -/** - * alloc_new_reservation()--allocate a new reservation window - * - * To make a new reservation, we search part of the filesystem - * reservation list (the list that inside the group). We try to - * allocate a new reservation window near the allocation goal, - * or the beginning of the group, if there is no goal. - * - * We first find a reservable space after the goal, then from - * there, we check the bitmap for the first free block after - * it. If there is no free block until the end of group, then the - * whole group is full, we failed. Otherwise, check if the free - * block is inside the expected reservable space, if so, we - * succeed. - * If the first free block is outside the reservable space, then - * start from the first free block, we search for next available - * space, and go on. - * - * on succeed, a new reservation will be found and inserted into the list - * It contains at least one free block, and it does not overlap with other - * reservation windows. - * - * failed: we failed to find a reservation window in this group - * - * @rsv: the reservation - * - * @grp_goal: The goal (group-relative). It is where the search for a - * free reservable space should start from. - * if we have a grp_goal(grp_goal >0 ), then start from there, - * no grp_goal(grp_goal = -1), we start from the first block - * of the group. - * - * @sb: the super block - * @group: the group we are trying to allocate in - * @bitmap_bh: the block group block bitmap - * - */ -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, - ext4_grpblk_t grp_goal, struct super_block *sb, - unsigned int group, struct buffer_head *bitmap_bh) -{ - struct ext4_reserve_window_node *search_head; - ext4_fsblk_t group_first_block, group_end_block, start_block; - ext4_grpblk_t first_free_block; - struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; - unsigned long size; - int ret; - spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; - - group_first_block = ext4_group_first_block_no(sb, group); - group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - if (grp_goal < 0) - start_block = group_first_block; - else - start_block = grp_goal + group_first_block; - - size = my_rsv->rsv_goal_size; - - if (!rsv_is_empty(&my_rsv->rsv_window)) { - /* - * if the old reservation is cross group boundary - * and if the goal is inside the old reservation window, - * we will come here when we just failed to allocate from - * the first part of the window. We still have another part - * that belongs to the next group. In this case, there is no - * point to discard our window and try to allocate a new one - * in this group(which will fail). we should - * keep the reservation window, just simply move on. - * - * Maybe we could shift the start block of the reservation - * window to the first block of next group. - */ - - if ((my_rsv->rsv_start <= group_end_block) && - (my_rsv->rsv_end > group_end_block) && - (start_block >= my_rsv->rsv_start)) - return -1; - - if ((my_rsv->rsv_alloc_hit > - (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { - /* - * if the previously allocation hit ratio is - * greater than 1/2, then we double the size of - * the reservation window the next time, - * otherwise we keep the same size window - */ - size = size * 2; - if (size > EXT4_MAX_RESERVE_BLOCKS) - size = EXT4_MAX_RESERVE_BLOCKS; - my_rsv->rsv_goal_size= size; - } - } - - spin_lock(rsv_lock); - /* - * shift the search start to the window near the goal block - */ - search_head = search_reserve_window(fs_rsv_root, start_block); - - /* - * find_next_reservable_window() simply finds a reservable window - * inside the given range(start_block, group_end_block). - * - * To make sure the reservation window has a free bit inside it, we - * need to check the bitmap after we found a reservable window. - */ -retry: - ret = find_next_reservable_window(search_head, my_rsv, sb, - start_block, group_end_block); - - if (ret == -1) { - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - spin_unlock(rsv_lock); - return -1; - } - - /* - * On success, find_next_reservable_window() returns the - * reservation window where there is a reservable space after it. - * Before we reserve this reservable space, we need - * to make sure there is at least a free block inside this region. - * - * searching the first free bit on the block bitmap and copy of - * last committed bitmap alternatively, until we found a allocatable - * block. Search start from the start block of the reservable space - * we just found. - */ - spin_unlock(rsv_lock); - first_free_block = bitmap_search_next_usable_block( - my_rsv->rsv_start - group_first_block, - bitmap_bh, group_end_block - group_first_block + 1); - - if (first_free_block < 0) { - /* - * no free block left on the bitmap, no point - * to reserve the space. return failed. - */ - spin_lock(rsv_lock); - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - spin_unlock(rsv_lock); - return -1; /* failed */ - } - - start_block = first_free_block + group_first_block; - /* - * check if the first free block is within the - * free space we just reserved - */ - if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end) - return 0; /* success */ - /* - * if the first free bit we found is out of the reservable space - * continue search for next reservable space, - * start from where the free block is, - * we also shift the list head to where we stopped last time - */ - search_head = my_rsv; - spin_lock(rsv_lock); - goto retry; -} - -/** - * try_to_extend_reservation() - * @my_rsv: given reservation window - * @sb: super block - * @size: the delta to extend - * - * Attempt to expand the reservation window large enough to have - * required number of free blocks - * - * Since ext4_try_to_allocate() will always allocate blocks within - * the reservation window range, if the window size is too small, - * multiple blocks allocation has to stop at the end of the reservation - * window. To make this more efficient, given the total number of - * blocks needed and the current size of the window, we try to - * expand the reservation window size if necessary on a best-effort - * basis before ext4_new_blocks() tries to allocate blocks, - */ -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, - struct super_block *sb, int size) -{ - struct ext4_reserve_window_node *next_rsv; - struct rb_node *next; - spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; - - if (!spin_trylock(rsv_lock)) - return; - - next = rb_next(&my_rsv->rsv_node); - - if (!next) - my_rsv->rsv_end += size; - else { - next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node); - - if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) - my_rsv->rsv_end += size; - else - my_rsv->rsv_end = next_rsv->rsv_start - 1; - } - spin_unlock(rsv_lock); -} - -/** - * ext4_try_to_allocate_with_rsv() - * @sb: superblock - * @handle: handle to this transaction - * @group: given allocation block group - * @bitmap_bh: bufferhead holds the block bitmap - * @grp_goal: given target block within the group - * @count: target number of blocks to allocate - * @my_rsv: reservation window - * @errp: pointer to store the error code - * - * This is the main function used to allocate a new block and its reservation - * window. - * - * Each time when a new block allocation is need, first try to allocate from - * its own reservation. If it does not have a reservation window, instead of - * looking for a free bit on bitmap first, then look up the reservation list to - * see if it is inside somebody else's reservation window, we try to allocate a - * reservation window for it starting from the goal first. Then do the block - * allocation within the reservation window. - * - * This will avoid keeping on searching the reservation list again and - * again when somebody is looking for a free block (without - * reservation), and there are lots of free blocks, but they are all - * being reserved. - * - * We use a red-black tree for the per-filesystem reservation list. - * - */ -static ext4_grpblk_t -ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, - unsigned int group, struct buffer_head *bitmap_bh, - ext4_grpblk_t grp_goal, - struct ext4_reserve_window_node * my_rsv, - unsigned long *count, int *errp) -{ - ext4_fsblk_t group_first_block, group_last_block; - ext4_grpblk_t ret = 0; - int fatal; - unsigned long num = *count; - - *errp = 0; - - /* - * Make sure we use undo access for the bitmap, because it is critical - * that we do the frozen_data COW on bitmap buffers in all cases even - * if the buffer is in BJ_Forget state in the committing transaction. - */ - BUFFER_TRACE(bitmap_bh, "get undo access for new block"); - fatal = ext4_journal_get_undo_access(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; - } - - /* - * we don't deal with reservation when - * filesystem is mounted without reservation - * or the file is not a regular file - * or last attempt to allocate a block with reservation turned on failed - */ - if (my_rsv == NULL ) { - ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, - grp_goal, count, NULL); - goto out; - } - /* - * grp_goal is a group relative block number (if there is a goal) - * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb) - * first block is a filesystem wide block number - * first block is the block number of the first block in this group - */ - group_first_block = ext4_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - /* - * Basically we will allocate a new block from inode's reservation - * window. - * - * We need to allocate a new reservation window, if: - * a) inode does not have a reservation window; or - * b) last attempt to allocate a block from existing reservation - * failed; or - * c) we come here with a goal and with a reservation window - * - * We do not need to allocate a new reservation window if we come here - * at the beginning with a goal and the goal is inside the window, or - * we don't have a goal but already have a reservation window. - * then we could go to allocate from the reservation window directly. - */ - while (1) { - if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || - !goal_in_my_reservation(&my_rsv->rsv_window, - grp_goal, group, sb)) { - if (my_rsv->rsv_goal_size < *count) - my_rsv->rsv_goal_size = *count; - ret = alloc_new_reservation(my_rsv, grp_goal, sb, - group, bitmap_bh); - if (ret < 0) - break; /* failed */ - - if (!goal_in_my_reservation(&my_rsv->rsv_window, - grp_goal, group, sb)) - grp_goal = -1; - } else if (grp_goal > 0 && - (my_rsv->rsv_end-grp_goal+1) < *count) - try_to_extend_reservation(my_rsv, sb, - *count-my_rsv->rsv_end + grp_goal - 1); - - if ((my_rsv->rsv_start > group_last_block) || - (my_rsv->rsv_end < group_first_block)) { - rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); - BUG(); - } - ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, - grp_goal, &num, &my_rsv->rsv_window); - if (ret >= 0) { - my_rsv->rsv_alloc_hit += num; - *count = num; - break; /* succeed */ - } - num = *count; - } -out: - if (ret >= 0) { - BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " - "bitmap block"); - fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; - } - return ret; - } - - BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); - ext4_journal_release_buffer(handle, bitmap_bh); - return ret; -} - -/** - * ext4_has_free_blocks() - * @sbi: in-core super block structure. - * - * Check if filesystem has at least 1 free block available for allocation. - */ -static int ext4_has_free_blocks(struct ext4_sb_info *sbi) -{ - ext4_fsblk_t free_blocks, root_blocks; - - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - root_blocks = ext4_r_blocks_count(sbi->s_es); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; -} - -/** - * ext4_should_retry_alloc() - * @sb: super block - * @retries number of attemps has been made - * - * ext4_should_retry_alloc() is called when ENOSPC is returned, and if - * it is profitable to retry the operation, this function will wait - * for the current or commiting transaction to complete, and then - * return TRUE. - * - * if the total number of retries exceed three times, return FALSE. - */ -int ext4_should_retry_alloc(struct super_block *sb, int *retries) -{ - if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) - return 0; - - jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - - return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); -} - -/** - * ext4_new_blocks() -- core block(s) allocation function - * @handle: handle to this transaction - * @inode: file inode - * @goal: given target block(filesystem wide) - * @count: target number of blocks to allocate - * @errp: error code - * - * ext4_new_blocks uses a goal block to assist allocation. It tries to - * allocate block(s) from the block group contains the goal block first. If that - * fails, it will try to allocate block(s) from other block groups without - * any specific goal block. - * - */ -ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) -{ - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gdp_bh; - unsigned long group_no; - int goal_group; - ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ - ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ - ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ - int bgi; /* blockgroup iteration index */ - int fatal = 0, err; - int performed_allocation = 0; - ext4_grpblk_t free_blocks; /* number of free blocks in a group */ - struct super_block *sb; - struct ext4_group_desc *gdp; - struct ext4_super_block *es; - struct ext4_sb_info *sbi; - struct ext4_reserve_window_node *my_rsv = NULL; - struct ext4_block_alloc_info *block_i; - unsigned short windowsz = 0; -#ifdef EXT4FS_DEBUG - static int goal_hits, goal_attempts; -#endif - unsigned long ngroups; - unsigned long num = *count; - - *errp = -ENOSPC; - sb = inode->i_sb; - if (!sb) { - printk("ext4_new_block: nonexistent device"); - return 0; - } - - /* - * Check quota for allocation of this block. - */ - if (DQUOT_ALLOC_BLOCK(inode, num)) { - *errp = -EDQUOT; - return 0; - } - - sbi = EXT4_SB(sb); - es = EXT4_SB(sb)->s_es; - ext4_debug("goal=%lu.\n", goal); - /* - * Allocate a block from reservation only when - * filesystem is mounted with reservation(default,-o reservation), and - * it's a regular file, and - * the desired window size is greater than 0 (One could use ioctl - * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off - * reservation on that particular file) - */ - block_i = EXT4_I(inode)->i_block_alloc_info; - if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) - my_rsv = &block_i->rsv_window_node; - - if (!ext4_has_free_blocks(sbi)) { - *errp = -ENOSPC; - goto out; - } - - /* - * First, test whether the goal block is free. - */ - if (goal < le32_to_cpu(es->s_first_data_block) || - goal >= ext4_blocks_count(es)) - goal = le32_to_cpu(es->s_first_data_block); - ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); - goal_group = group_no; -retry_alloc: - gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) - goto io_error; - - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - /* - * if there is not enough free blocks to make a new resevation - * turn off reservation for this allocation - */ - if (my_rsv && (free_blocks < windowsz) - && (rsv_is_empty(&my_rsv->rsv_window))) - my_rsv = NULL; - - if (free_blocks > 0) { - bitmap_bh = read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, - group_no, bitmap_bh, grp_target_blk, - my_rsv, &num, &fatal); - if (fatal) - goto out; - if (grp_alloc_blk >= 0) - goto allocated; - } - - ngroups = EXT4_SB(sb)->s_groups_count; - smp_rmb(); - - /* - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. - */ - for (bgi = 0; bgi < ngroups; bgi++) { - group_no++; - if (group_no >= ngroups) - group_no = 0; - gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) { - *errp = -EIO; - goto out; - } - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - /* - * skip this group if the number of - * free blocks is less than half of the reservation - * window size. - */ - if (free_blocks <= (windowsz/2)) - continue; - - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - /* - * try to allocate block(s) from this group, without a goal(-1). - */ - grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, - group_no, bitmap_bh, -1, my_rsv, - &num, &fatal); - if (fatal) - goto out; - if (grp_alloc_blk >= 0) - goto allocated; - } - /* - * We may end up a bogus ealier ENOSPC error due to - * filesystem is "full" of reservations, but - * there maybe indeed free blocks avaliable on disk - * In this case, we just forget about the reservations - * just do block allocation as without reservations. - */ - if (my_rsv) { - my_rsv = NULL; - group_no = goal_group; - goto retry_alloc; - } - /* No space left on the device */ - *errp = -ENOSPC; - goto out; - -allocated: - - ext4_debug("using block group %d(%d)\n", - group_no, gdp->bg_free_blocks_count); - - BUFFER_TRACE(gdp_bh, "get_write_access"); - fatal = ext4_journal_get_write_access(handle, gdp_bh); - if (fatal) - goto out; - - ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); - - if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || - in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || - in_range(ret_block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || - in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) - ext4_error(sb, "ext4_new_block", - "Allocating block in system zone - " - "blocks from %llu, length %lu", - ret_block, num); - - performed_allocation = 1; - -#ifdef CONFIG_JBD_DEBUG - { - struct buffer_head *debug_bh; - - /* Record bitmap buffer state in the newly allocated block */ - debug_bh = sb_find_get_block(sb, ret_block); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "state when allocated"); - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); - brelse(debug_bh); - } - } - jbd_lock_bh_state(bitmap_bh); - spin_lock(sb_bgl_lock(sbi, group_no)); - if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { - int i; - - for (i = 0; i < num; i++) { - if (ext4_test_bit(grp_alloc_blk+i, - bh2jh(bitmap_bh)->b_committed_data)) { - printk("%s: block was unexpectedly set in " - "b_committed_data\n", __FUNCTION__); - } - } - } - ext4_debug("found bit %d\n", grp_alloc_blk); - spin_unlock(sb_bgl_lock(sbi, group_no)); - jbd_unlock_bh_state(bitmap_bh); -#endif - - if (ret_block + num - 1 >= ext4_blocks_count(es)) { - ext4_error(sb, "ext4_new_block", - "block(%llu) >= blocks count(%llu) - " - "block_group = %lu, es == %p ", ret_block, - ext4_blocks_count(es), group_no, es); - goto out; - } - - /* - * It is up to the caller to add the new buffer to a journal - * list of some description. We don't know in advance whether - * the caller wants to use it as metadata or data. - */ - ext4_debug("allocating block %lu. Goal hits %d of %d.\n", - ret_block, goal_hits, goal_attempts); - - spin_lock(sb_bgl_lock(sbi, group_no)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); - spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_mod(&sbi->s_freeblocks_counter, -num); - - BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); - err = ext4_journal_dirty_metadata(handle, gdp_bh); - if (!fatal) - fatal = err; - - sb->s_dirt = 1; - if (fatal) - goto out; - - *errp = 0; - brelse(bitmap_bh); - DQUOT_FREE_BLOCK(inode, *count-num); - *count = num; - return ret_block; - -io_error: - *errp = -EIO; -out: - if (fatal) { - *errp = fatal; - ext4_std_error(sb, fatal); - } - /* - * Undo the block allocation - */ - if (!performed_allocation) - DQUOT_FREE_BLOCK(inode, *count); - brelse(bitmap_bh); - return 0; -} - -ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp) -{ - unsigned long count = 1; - - return ext4_new_blocks(handle, inode, goal, &count, errp); -} - -/** - * ext4_count_free_blocks() -- count filesystem free blocks - * @sb: superblock - * - * Adds up the number of free blocks from each block group. - */ -ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) -{ - ext4_fsblk_t desc_count; - struct ext4_group_desc *gdp; - int i; - unsigned long ngroups = EXT4_SB(sb)->s_groups_count; -#ifdef EXT4FS_DEBUG - struct ext4_super_block *es; - ext4_fsblk_t bitmap_count; - unsigned long x; - struct buffer_head *bitmap_bh = NULL; - - es = EXT4_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; - - smp_rmb(); - for (i = 0; i < ngroups; i++) { - gdp = ext4_get_group_desc(sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, i); - if (bitmap_bh == NULL) - continue; - - x = ext4_count_free(bitmap_bh, sb->s_blocksize); - printk("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); - bitmap_count += x; - } - brelse(bitmap_bh); - printk("ext4_count_free_blocks: stored = %llu" - ", computed = %llu, %llu\n", - EXT4_FREE_BLOCKS_COUNT(es), - desc_count, bitmap_count); - return bitmap_count; -#else - desc_count = 0; - smp_rmb(); - for (i = 0; i < ngroups; i++) { - gdp = ext4_get_group_desc(sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); - } - - return desc_count; -#endif -} - -static inline int -block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map) -{ - ext4_grpblk_t offset; - - ext4_get_group_no_and_offset(sb, block, NULL, &offset); - return ext4_test_bit (offset, map); -} - -static inline int test_root(int a, int b) -{ - int num = b; - - while (a > num) - num *= b; - return num == a; -} - -static int ext4_group_sparse(int group) -{ - if (group <= 1) - return 1; - if (!(group & 1)) - return 0; - return (test_root(group, 7) || test_root(group, 5) || - test_root(group, 3)); -} - -/** - * ext4_bg_has_super - number of blocks used by the superblock in group - * @sb: superblock for filesystem - * @group: group number to check - * - * Return the number of blocks used by the superblock (primary or backup) - * in this group. Currently this will be only 0 or 1. - */ -int ext4_bg_has_super(struct super_block *sb, int group) -{ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) - return 0; - return 1; -} - -static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group) -{ - unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); - unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb); - unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1; - - if (group == first || group == first + 1 || group == last) - return 1; - return 0; -} - -static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group) -{ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) - return 0; - return EXT4_SB(sb)->s_gdb_count; -} - -/** - * ext4_bg_num_gdb - number of blocks used by the group table in group - * @sb: superblock for filesystem - * @group: group number to check - * - * Return the number of blocks used by the group descriptor table - * (primary or backup) in this group. In the future there may be a - * different number of descriptor blocks in each group. - */ -unsigned long ext4_bg_num_gdb(struct super_block *sb, int group) -{ - unsigned long first_meta_bg = - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); - unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); - - if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || - metagroup < first_meta_bg) - return ext4_bg_num_gdb_nometa(sb,group); - - return ext4_bg_num_gdb_meta(sb,group); - -} diff --git a/trunk/fs/ext4/bitmap.c b/trunk/fs/ext4/bitmap.c deleted file mode 100644 index 11e93c169bcf..000000000000 --- a/trunk/fs/ext4/bitmap.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * linux/fs/ext4/bitmap.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - */ - -#include -#include -#include - -#ifdef EXT4FS_DEBUG - -static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - -unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) -{ - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); -} - -#endif /* EXT4FS_DEBUG */ - diff --git a/trunk/fs/ext4/dir.c b/trunk/fs/ext4/dir.c deleted file mode 100644 index f8595787a70e..000000000000 --- a/trunk/fs/ext4/dir.c +++ /dev/null @@ -1,518 +0,0 @@ -/* - * linux/fs/ext4/dir.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/dir.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext4 directory handling functions - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Hash Tree Directory indexing (c) 2001 Daniel Phillips - * - */ - -#include -#include -#include -#include -#include -#include -#include - -static unsigned char ext4_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - -static int ext4_readdir(struct file *, void *, filldir_t); -static int ext4_dx_readdir(struct file * filp, - void * dirent, filldir_t filldir); -static int ext4_release_dir (struct inode * inode, - struct file * filp); - -const struct file_operations ext4_dir_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = ext4_readdir, /* we take BKL. needed?*/ - .ioctl = ext4_ioctl, /* BKL held */ -#ifdef CONFIG_COMPAT - .compat_ioctl = ext4_compat_ioctl, -#endif - .fsync = ext4_sync_file, /* BKL held */ -#ifdef CONFIG_EXT4_INDEX - .release = ext4_release_dir, -#endif -}; - - -static unsigned char get_dtype(struct super_block *sb, int filetype) -{ - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || - (filetype >= EXT4_FT_MAX)) - return DT_UNKNOWN; - - return (ext4_filetype_table[filetype]); -} - - -int ext4_check_dir_entry (const char * function, struct inode * dir, - struct ext4_dir_entry_2 * de, - struct buffer_head * bh, - unsigned long offset) -{ - const char * error_msg = NULL; - const int rlen = le16_to_cpu(de->rec_len); - - if (rlen < EXT4_DIR_REC_LEN(1)) - error_msg = "rec_len is smaller than minimal"; - else if (rlen % 4 != 0) - error_msg = "rec_len % 4 != 0"; - else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) - error_msg = "rec_len is too small for name_len"; - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) - error_msg = "directory entry across blocks"; - else if (le32_to_cpu(de->inode) > - le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) - error_msg = "inode out of bounds"; - - if (error_msg != NULL) - ext4_error (dir->i_sb, function, - "bad entry in directory #%lu: %s - " - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error_msg, offset, - (unsigned long) le32_to_cpu(de->inode), - rlen, de->name_len); - return error_msg == NULL ? 1 : 0; -} - -static int ext4_readdir(struct file * filp, - void * dirent, filldir_t filldir) -{ - int error = 0; - unsigned long offset; - int i, stored; - struct ext4_dir_entry_2 *de; - struct super_block *sb; - int err; - struct inode *inode = filp->f_dentry->d_inode; - int ret = 0; - - sb = inode->i_sb; - -#ifdef CONFIG_EXT4_INDEX - if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX) && - ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || - ((inode->i_size >> sb->s_blocksize_bits) == 1))) { - err = ext4_dx_readdir(filp, dirent, filldir); - if (err != ERR_BAD_DX_DIR) { - ret = err; - goto out; - } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - EXT4_I(filp->f_dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; - } -#endif - stored = 0; - offset = filp->f_pos & (sb->s_blocksize - 1); - - while (!error && !stored && filp->f_pos < inode->i_size) { - unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); - struct buffer_head map_bh; - struct buffer_head *bh = NULL; - - map_bh.b_state = 0; - err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); - if (err > 0) { - page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, - &filp->f_ra, - filp, - map_bh.b_blocknr >> - (PAGE_CACHE_SHIFT - inode->i_blkbits), - 1); - bh = ext4_bread(NULL, inode, blk, 0, &err); - } - - /* - * We ignore I/O errors on directories so users have a chance - * of recovering data when there's a bad sector - */ - if (!bh) { - ext4_error (sb, "ext4_readdir", - "directory #%lu contains a hole at offset %lu", - inode->i_ino, (unsigned long)filp->f_pos); - filp->f_pos += sb->s_blocksize - offset; - continue; - } - -revalidate: - /* If the dir block has changed since the last call to - * readdir(2), then we might be pointing to an invalid - * dirent right now. Scan from the start of the block - * to make sure. */ - if (filp->f_version != inode->i_version) { - for (i = 0; i < sb->s_blocksize && i < offset; ) { - de = (struct ext4_dir_entry_2 *) - (bh->b_data + i); - /* It's too expensive to do a full - * dirent test each time round this - * loop, but we do have to test at - * least that it is non-zero. A - * failure will be detected in the - * dirent test below. */ - if (le16_to_cpu(de->rec_len) < - EXT4_DIR_REC_LEN(1)) - break; - i += le16_to_cpu(de->rec_len); - } - offset = i; - filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) - | offset; - filp->f_version = inode->i_version; - } - - while (!error && filp->f_pos < inode->i_size - && offset < sb->s_blocksize) { - de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); - if (!ext4_check_dir_entry ("ext4_readdir", inode, de, - bh, offset)) { - /* - * On error, skip the f_pos to the next block - */ - filp->f_pos = (filp->f_pos | - (sb->s_blocksize - 1)) + 1; - brelse (bh); - ret = stored; - goto out; - } - offset += le16_to_cpu(de->rec_len); - if (le32_to_cpu(de->inode)) { - /* We might block in the next section - * if the data destination is - * currently swapped out. So, use a - * version stamp to detect whether or - * not the directory has been modified - * during the copy operation. - */ - unsigned long version = filp->f_version; - - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), - get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) - goto revalidate; - stored ++; - } - filp->f_pos += le16_to_cpu(de->rec_len); - } - offset = 0; - brelse (bh); - } -out: - return ret; -} - -#ifdef CONFIG_EXT4_INDEX -/* - * These functions convert from the major/minor hash to an f_pos - * value. - * - * Currently we only use major hash numer. This is unfortunate, but - * on 32-bit machines, the same VFS interface is used for lseek and - * llseek, so if we use the 64 bit offset, then the 32-bit versions of - * lseek/telldir/seekdir will blow out spectacularly, and from within - * the ext2 low-level routine, we don't know if we're being called by - * a 64-bit version of the system call or the 32-bit version of the - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir - * cookie. Sigh. - */ -#define hash2pos(major, minor) (major >> 1) -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -#define pos2min_hash(pos) (0) - -/* - * This structure holds the nodes of the red-black tree used to store - * the directory entry in hash order. - */ -struct fname { - __u32 hash; - __u32 minor_hash; - struct rb_node rb_hash; - struct fname *next; - __u32 inode; - __u8 name_len; - __u8 file_type; - char name[0]; -}; - -/* - * This functoin implements a non-recursive way of freeing all of the - * nodes in the red-black tree. - */ -static void free_rb_tree_fname(struct rb_root *root) -{ - struct rb_node *n = root->rb_node; - struct rb_node *parent; - struct fname *fname; - - while (n) { - /* Do the node's children first */ - if ((n)->rb_left) { - n = n->rb_left; - continue; - } - if (n->rb_right) { - n = n->rb_right; - continue; - } - /* - * The node has no children; free it, and then zero - * out parent's link to it. Finally go to the - * beginning of the loop and try to free the parent - * node. - */ - parent = rb_parent(n); - fname = rb_entry(n, struct fname, rb_hash); - while (fname) { - struct fname * old = fname; - fname = fname->next; - kfree (old); - } - if (!parent) - root->rb_node = NULL; - else if (parent->rb_left == n) - parent->rb_left = NULL; - else if (parent->rb_right == n) - parent->rb_right = NULL; - n = parent; - } - root->rb_node = NULL; -} - - -static struct dir_private_info *create_dir_info(loff_t pos) -{ - struct dir_private_info *p; - - p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); - if (!p) - return NULL; - p->root.rb_node = NULL; - p->curr_node = NULL; - p->extra_fname = NULL; - p->last_pos = 0; - p->curr_hash = pos2maj_hash(pos); - p->curr_minor_hash = pos2min_hash(pos); - p->next_hash = 0; - return p; -} - -void ext4_htree_free_dir_info(struct dir_private_info *p) -{ - free_rb_tree_fname(&p->root); - kfree(p); -} - -/* - * Given a directory entry, enter it into the fname rb tree. - */ -int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext4_dir_entry_2 *dirent) -{ - struct rb_node **p, *parent = NULL; - struct fname * fname, *new_fn; - struct dir_private_info *info; - int len; - - info = (struct dir_private_info *) dir_file->private_data; - p = &info->root.rb_node; - - /* Create and allocate the fname structure */ - len = sizeof(struct fname) + dirent->name_len + 1; - new_fn = kzalloc(len, GFP_KERNEL); - if (!new_fn) - return -ENOMEM; - new_fn->hash = hash; - new_fn->minor_hash = minor_hash; - new_fn->inode = le32_to_cpu(dirent->inode); - new_fn->name_len = dirent->name_len; - new_fn->file_type = dirent->file_type; - memcpy(new_fn->name, dirent->name, dirent->name_len); - new_fn->name[dirent->name_len] = 0; - - while (*p) { - parent = *p; - fname = rb_entry(parent, struct fname, rb_hash); - - /* - * If the hash and minor hash match up, then we put - * them on a linked list. This rarely happens... - */ - if ((new_fn->hash == fname->hash) && - (new_fn->minor_hash == fname->minor_hash)) { - new_fn->next = fname->next; - fname->next = new_fn; - return 0; - } - - if (new_fn->hash < fname->hash) - p = &(*p)->rb_left; - else if (new_fn->hash > fname->hash) - p = &(*p)->rb_right; - else if (new_fn->minor_hash < fname->minor_hash) - p = &(*p)->rb_left; - else /* if (new_fn->minor_hash > fname->minor_hash) */ - p = &(*p)->rb_right; - } - - rb_link_node(&new_fn->rb_hash, parent, p); - rb_insert_color(&new_fn->rb_hash, &info->root); - return 0; -} - - - -/* - * This is a helper function for ext4_dx_readdir. It calls filldir - * for all entres on the fname linked list. (Normally there is only - * one entry on the linked list, unless there are 62 bit hash collisions.) - */ -static int call_filldir(struct file * filp, void * dirent, - filldir_t filldir, struct fname *fname) -{ - struct dir_private_info *info = filp->private_data; - loff_t curr_pos; - struct inode *inode = filp->f_dentry->d_inode; - struct super_block * sb; - int error; - - sb = inode->i_sb; - - if (!fname) { - printk("call_filldir: called with null fname?!?\n"); - return 0; - } - curr_pos = hash2pos(fname->hash, fname->minor_hash); - while (fname) { - error = filldir(dirent, fname->name, - fname->name_len, curr_pos, - fname->inode, - get_dtype(sb, fname->file_type)); - if (error) { - filp->f_pos = curr_pos; - info->extra_fname = fname->next; - return error; - } - fname = fname->next; - } - return 0; -} - -static int ext4_dx_readdir(struct file * filp, - void * dirent, filldir_t filldir) -{ - struct dir_private_info *info = filp->private_data; - struct inode *inode = filp->f_dentry->d_inode; - struct fname *fname; - int ret; - - if (!info) { - info = create_dir_info(filp->f_pos); - if (!info) - return -ENOMEM; - filp->private_data = info; - } - - if (filp->f_pos == EXT4_HTREE_EOF) - return 0; /* EOF */ - - /* Some one has messed with f_pos; reset the world */ - if (info->last_pos != filp->f_pos) { - free_rb_tree_fname(&info->root); - info->curr_node = NULL; - info->extra_fname = NULL; - info->curr_hash = pos2maj_hash(filp->f_pos); - info->curr_minor_hash = pos2min_hash(filp->f_pos); - } - - /* - * If there are any leftover names on the hash collision - * chain, return them first. - */ - if (info->extra_fname && - call_filldir(filp, dirent, filldir, info->extra_fname)) - goto finished; - - if (!info->curr_node) - info->curr_node = rb_first(&info->root); - - while (1) { - /* - * Fill the rbtree if we have no more entries, - * or the inode has changed since we last read in the - * cached entries. - */ - if ((!info->curr_node) || - (filp->f_version != inode->i_version)) { - info->curr_node = NULL; - free_rb_tree_fname(&info->root); - filp->f_version = inode->i_version; - ret = ext4_htree_fill_tree(filp, info->curr_hash, - info->curr_minor_hash, - &info->next_hash); - if (ret < 0) - return ret; - if (ret == 0) { - filp->f_pos = EXT4_HTREE_EOF; - break; - } - info->curr_node = rb_first(&info->root); - } - - fname = rb_entry(info->curr_node, struct fname, rb_hash); - info->curr_hash = fname->hash; - info->curr_minor_hash = fname->minor_hash; - if (call_filldir(filp, dirent, filldir, fname)) - break; - - info->curr_node = rb_next(info->curr_node); - if (!info->curr_node) { - if (info->next_hash == ~0) { - filp->f_pos = EXT4_HTREE_EOF; - break; - } - info->curr_hash = info->next_hash; - info->curr_minor_hash = 0; - } - } -finished: - info->last_pos = filp->f_pos; - return 0; -} - -static int ext4_release_dir (struct inode * inode, struct file * filp) -{ - if (filp->private_data) - ext4_htree_free_dir_info(filp->private_data); - - return 0; -} - -#endif diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c deleted file mode 100644 index 2608dce18f3e..000000000000 --- a/trunk/fs/ext4/extents.c +++ /dev/null @@ -1,2152 +0,0 @@ -/* - * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com - * Written by Alex Tomas - * - * Architecture independence: - * Copyright (c) 2005, Bull S.A. - * Written by Pierre Peiffer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - */ - -/* - * Extents support for EXT4 - * - * TODO: - * - ext4*_error() should be used in some situations - * - analyze all BUG()/BUG_ON(), use -EIO where appropriate - * - smart tree reduction - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* - * ext_pblock: - * combine low and high parts of physical block number into ext4_fsblk_t - */ -static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex) -{ - ext4_fsblk_t block; - - block = le32_to_cpu(ex->ee_start); - block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; - return block; -} - -/* - * idx_pblock: - * combine low and high parts of a leaf physical block number into ext4_fsblk_t - */ -static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) -{ - ext4_fsblk_t block; - - block = le32_to_cpu(ix->ei_leaf); - block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; - return block; -} - -/* - * ext4_ext_store_pblock: - * stores a large physical block number into an extent struct, - * breaking it into parts - */ -static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) -{ - ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); - ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); -} - -/* - * ext4_idx_store_pblock: - * stores a large physical block number into an index struct, - * breaking it into parts - */ -static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) -{ - ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); - ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); -} - -static int ext4_ext_check_header(const char *function, struct inode *inode, - struct ext4_extent_header *eh) -{ - const char *error_msg = NULL; - - if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { - error_msg = "invalid magic"; - goto corrupted; - } - if (unlikely(eh->eh_max == 0)) { - error_msg = "invalid eh_max"; - goto corrupted; - } - if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { - error_msg = "invalid eh_entries"; - goto corrupted; - } - return 0; - -corrupted: - ext4_error(inode->i_sb, function, - "bad header in inode #%lu: %s - magic %x, " - "entries %u, max %u, depth %u", - inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), - le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), - le16_to_cpu(eh->eh_depth)); - - return -EIO; -} - -static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) -{ - int err; - - if (handle->h_buffer_credits > needed) - return handle; - if (!ext4_journal_extend(handle, needed)) - return handle; - err = ext4_journal_restart(handle, needed); - - return handle; -} - -/* - * could return: - * - EROFS - * - ENOMEM - */ -static int ext4_ext_get_access(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - if (path->p_bh) { - /* path points to block */ - return ext4_journal_get_write_access(handle, path->p_bh); - } - /* path points to leaf/index in inode body */ - /* we use in-core data, no need to protect them */ - return 0; -} - -/* - * could return: - * - EROFS - * - ENOMEM - * - EIO - */ -static int ext4_ext_dirty(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int err; - if (path->p_bh) { - /* path points to block */ - err = ext4_journal_dirty_metadata(handle, path->p_bh); - } else { - /* path points to leaf/index in inode body */ - err = ext4_mark_inode_dirty(handle, inode); - } - return err; -} - -static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, - struct ext4_ext_path *path, - ext4_fsblk_t block) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - ext4_fsblk_t bg_start; - ext4_grpblk_t colour; - int depth; - - if (path) { - struct ext4_extent *ex; - depth = path->p_depth; - - /* try to predict block placement */ - if ((ex = path[depth].p_ext)) - return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); - - /* it looks like index is empty; - * try to find starting block from index itself */ - if (path[depth].p_bh) - return path[depth].p_bh->b_blocknr; - } - - /* OK. use inode's group */ - bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - return bg_start + colour + block; -} - -static ext4_fsblk_t -ext4_ext_new_block(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *ex, int *err) -{ - ext4_fsblk_t goal, newblock; - - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); - newblock = ext4_new_block(handle, inode, goal, err); - return newblock; -} - -static inline int ext4_ext_space_block(struct inode *inode) -{ - int size; - - size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) - / sizeof(struct ext4_extent); -#ifdef AGRESSIVE_TEST - if (size > 6) - size = 6; -#endif - return size; -} - -static inline int ext4_ext_space_block_idx(struct inode *inode) -{ - int size; - - size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) - / sizeof(struct ext4_extent_idx); -#ifdef AGRESSIVE_TEST - if (size > 5) - size = 5; -#endif - return size; -} - -static inline int ext4_ext_space_root(struct inode *inode) -{ - int size; - - size = sizeof(EXT4_I(inode)->i_data); - size -= sizeof(struct ext4_extent_header); - size /= sizeof(struct ext4_extent); -#ifdef AGRESSIVE_TEST - if (size > 3) - size = 3; -#endif - return size; -} - -static inline int ext4_ext_space_root_idx(struct inode *inode) -{ - int size; - - size = sizeof(EXT4_I(inode)->i_data); - size -= sizeof(struct ext4_extent_header); - size /= sizeof(struct ext4_extent_idx); -#ifdef AGRESSIVE_TEST - if (size > 4) - size = 4; -#endif - return size; -} - -#ifdef EXT_DEBUG -static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) -{ - int k, l = path->p_depth; - - ext_debug("path:"); - for (k = 0; k <= l; k++, path++) { - if (path->p_idx) { - ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), - idx_pblock(path->p_idx)); - } else if (path->p_ext) { - ext_debug(" %d:%d:%llu ", - le32_to_cpu(path->p_ext->ee_block), - le16_to_cpu(path->p_ext->ee_len), - ext_pblock(path->p_ext)); - } else - ext_debug(" []"); - } - ext_debug("\n"); -} - -static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) -{ - int depth = ext_depth(inode); - struct ext4_extent_header *eh; - struct ext4_extent *ex; - int i; - - if (!path) - return; - - eh = path[depth].p_hdr; - ex = EXT_FIRST_EXTENT(eh); - - for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { - ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), - le16_to_cpu(ex->ee_len), ext_pblock(ex)); - } - ext_debug("\n"); -} -#else -#define ext4_ext_show_path(inode,path) -#define ext4_ext_show_leaf(inode,path) -#endif - -static void ext4_ext_drop_refs(struct ext4_ext_path *path) -{ - int depth = path->p_depth; - int i; - - for (i = 0; i <= depth; i++, path++) - if (path->p_bh) { - brelse(path->p_bh); - path->p_bh = NULL; - } -} - -/* - * ext4_ext_binsearch_idx: - * binary search for the closest index of the given block - */ -static void -ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) -{ - struct ext4_extent_header *eh = path->p_hdr; - struct ext4_extent_idx *r, *l, *m; - - BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); - BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); - BUG_ON(le16_to_cpu(eh->eh_entries) <= 0); - - ext_debug("binsearch for %d(idx): ", block); - - l = EXT_FIRST_INDEX(eh) + 1; - r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1; - while (l <= r) { - m = l + (r - l) / 2; - if (block < le32_to_cpu(m->ei_block)) - r = m - 1; - else - l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block, - m, m->ei_block, r, r->ei_block); - } - - path->p_idx = l - 1; - ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), - idx_block(path->p_idx)); - -#ifdef CHECK_BINSEARCH - { - struct ext4_extent_idx *chix, *ix; - int k; - - chix = ix = EXT_FIRST_INDEX(eh); - for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { - if (k != 0 && - le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { - printk("k=%d, ix=0x%p, first=0x%p\n", k, - ix, EXT_FIRST_INDEX(eh)); - printk("%u <= %u\n", - le32_to_cpu(ix->ei_block), - le32_to_cpu(ix[-1].ei_block)); - } - BUG_ON(k && le32_to_cpu(ix->ei_block) - <= le32_to_cpu(ix[-1].ei_block)); - if (block < le32_to_cpu(ix->ei_block)) - break; - chix = ix; - } - BUG_ON(chix != path->p_idx); - } -#endif - -} - -/* - * ext4_ext_binsearch: - * binary search for closest extent of the given block - */ -static void -ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) -{ - struct ext4_extent_header *eh = path->p_hdr; - struct ext4_extent *r, *l, *m; - - BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); - BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); - - if (eh->eh_entries == 0) { - /* - * this leaf is empty: - * we get such a leaf in split/add case - */ - return; - } - - ext_debug("binsearch for %d: ", block); - - l = EXT_FIRST_EXTENT(eh) + 1; - r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1; - - while (l <= r) { - m = l + (r - l) / 2; - if (block < le32_to_cpu(m->ee_block)) - r = m - 1; - else - l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, - m, m->ee_block, r, r->ee_block); - } - - path->p_ext = l - 1; - ext_debug(" -> %d:%llu:%d ", - le32_to_cpu(path->p_ext->ee_block), - ext_pblock(path->p_ext), - le16_to_cpu(path->p_ext->ee_len)); - -#ifdef CHECK_BINSEARCH - { - struct ext4_extent *chex, *ex; - int k; - - chex = ex = EXT_FIRST_EXTENT(eh); - for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { - BUG_ON(k && le32_to_cpu(ex->ee_block) - <= le32_to_cpu(ex[-1].ee_block)); - if (block < le32_to_cpu(ex->ee_block)) - break; - chex = ex; - } - BUG_ON(chex != path->p_ext); - } -#endif - -} - -int ext4_ext_tree_init(handle_t *handle, struct inode *inode) -{ - struct ext4_extent_header *eh; - - eh = ext_inode_hdr(inode); - eh->eh_depth = 0; - eh->eh_entries = 0; - eh->eh_magic = EXT4_EXT_MAGIC; - eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); - ext4_mark_inode_dirty(handle, inode); - ext4_ext_invalidate_cache(inode); - return 0; -} - -struct ext4_ext_path * -ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) -{ - struct ext4_extent_header *eh; - struct buffer_head *bh; - short int depth, i, ppos = 0, alloc = 0; - - eh = ext_inode_hdr(inode); - BUG_ON(eh == NULL); - if (ext4_ext_check_header(__FUNCTION__, inode, eh)) - return ERR_PTR(-EIO); - - i = depth = ext_depth(inode); - - /* account possible depth increase */ - if (!path) { - path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2), - GFP_NOFS); - if (!path) - return ERR_PTR(-ENOMEM); - alloc = 1; - } - memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); - path[0].p_hdr = eh; - - /* walk through the tree */ - while (i) { - ext_debug("depth %d: num %d, max %d\n", - ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); - ext4_ext_binsearch_idx(inode, path + ppos, block); - path[ppos].p_block = idx_pblock(path[ppos].p_idx); - path[ppos].p_depth = i; - path[ppos].p_ext = NULL; - - bh = sb_bread(inode->i_sb, path[ppos].p_block); - if (!bh) - goto err; - - eh = ext_block_hdr(bh); - ppos++; - BUG_ON(ppos > depth); - path[ppos].p_bh = bh; - path[ppos].p_hdr = eh; - i--; - - if (ext4_ext_check_header(__FUNCTION__, inode, eh)) - goto err; - } - - path[ppos].p_depth = i; - path[ppos].p_hdr = eh; - path[ppos].p_ext = NULL; - path[ppos].p_idx = NULL; - - if (ext4_ext_check_header(__FUNCTION__, inode, eh)) - goto err; - - /* find extent */ - ext4_ext_binsearch(inode, path + ppos, block); - - ext4_ext_show_path(inode, path); - - return path; - -err: - ext4_ext_drop_refs(path); - if (alloc) - kfree(path); - return ERR_PTR(-EIO); -} - -/* - * ext4_ext_insert_index: - * insert new index [@logical;@ptr] into the block at @curp; - * check where to insert: before @curp or after @curp - */ -static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, - struct ext4_ext_path *curp, - int logical, ext4_fsblk_t ptr) -{ - struct ext4_extent_idx *ix; - int len, err; - - if ((err = ext4_ext_get_access(handle, inode, curp))) - return err; - - BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); - len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; - if (logical > le32_to_cpu(curp->p_idx->ei_block)) { - /* insert after */ - if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { - len = (len - 1) * sizeof(struct ext4_extent_idx); - len = len < 0 ? 0 : len; - ext_debug("insert new index %d after: %d. " - "move %d from 0x%p to 0x%p\n", - logical, ptr, len, - (curp->p_idx + 1), (curp->p_idx + 2)); - memmove(curp->p_idx + 2, curp->p_idx + 1, len); - } - ix = curp->p_idx + 1; - } else { - /* insert before */ - len = len * sizeof(struct ext4_extent_idx); - len = len < 0 ? 0 : len; - ext_debug("insert new index %d before: %d. " - "move %d from 0x%p to 0x%p\n", - logical, ptr, len, - curp->p_idx, (curp->p_idx + 1)); - memmove(curp->p_idx + 1, curp->p_idx, len); - ix = curp->p_idx; - } - - ix->ei_block = cpu_to_le32(logical); - ext4_idx_store_pblock(ix, ptr); - curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); - - BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max)); - BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); - - err = ext4_ext_dirty(handle, inode, curp); - ext4_std_error(inode->i_sb, err); - - return err; -} - -/* - * ext4_ext_split: - * inserts new subtree into the path, using free index entry - * at depth @at: - * - allocates all needed blocks (new leaf and all intermediate index blocks) - * - makes decision where to split - * - moves remaining extents and index entries (right to the split point) - * into the newly allocated blocks - * - initializes subtree - */ -static int ext4_ext_split(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext, int at) -{ - struct buffer_head *bh = NULL; - int depth = ext_depth(inode); - struct ext4_extent_header *neh; - struct ext4_extent_idx *fidx; - struct ext4_extent *ex; - int i = at, k, m, a; - ext4_fsblk_t newblock, oldblock; - __le32 border; - ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ - int err = 0; - - /* make decision: where to split? */ - /* FIXME: now decision is simplest: at current extent */ - - /* if current leaf will be split, then we should use - * border from split point */ - BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); - if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { - border = path[depth].p_ext[1].ee_block; - ext_debug("leaf will be split." - " next leaf starts at %d\n", - le32_to_cpu(border)); - } else { - border = newext->ee_block; - ext_debug("leaf will be added." - " next leaf starts at %d\n", - le32_to_cpu(border)); - } - - /* - * If error occurs, then we break processing - * and mark filesystem read-only. index won't - * be inserted and tree will be in consistent - * state. Next mount will repair buffers too. - */ - - /* - * Get array to track all allocated blocks. - * We need this to handle errors and free blocks - * upon them. - */ - ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); - if (!ablocks) - return -ENOMEM; - memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth); - - /* allocate all needed blocks */ - ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); - for (a = 0; a < depth - at; a++) { - newblock = ext4_ext_new_block(handle, inode, path, newext, &err); - if (newblock == 0) - goto cleanup; - ablocks[a] = newblock; - } - - /* initialize new leaf */ - newblock = ablocks[--a]; - BUG_ON(newblock == 0); - bh = sb_getblk(inode->i_sb, newblock); - if (!bh) { - err = -EIO; - goto cleanup; - } - lock_buffer(bh); - - if ((err = ext4_journal_get_create_access(handle, bh))) - goto cleanup; - - neh = ext_block_hdr(bh); - neh->eh_entries = 0; - neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); - neh->eh_magic = EXT4_EXT_MAGIC; - neh->eh_depth = 0; - ex = EXT_FIRST_EXTENT(neh); - - /* move remainder of path[depth] to the new leaf */ - BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); - /* start copy from next extent */ - /* TODO: we could do it by single memmove */ - m = 0; - path[depth].p_ext++; - while (path[depth].p_ext <= - EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:%d in new leaf %llu\n", - le32_to_cpu(path[depth].p_ext->ee_block), - ext_pblock(path[depth].p_ext), - le16_to_cpu(path[depth].p_ext->ee_len), - newblock); - /*memmove(ex++, path[depth].p_ext++, - sizeof(struct ext4_extent)); - neh->eh_entries++;*/ - path[depth].p_ext++; - m++; - } - if (m) { - memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); - neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); - } - - set_buffer_uptodate(bh); - unlock_buffer(bh); - - if ((err = ext4_journal_dirty_metadata(handle, bh))) - goto cleanup; - brelse(bh); - bh = NULL; - - /* correct old leaf */ - if (m) { - if ((err = ext4_ext_get_access(handle, inode, path + depth))) - goto cleanup; - path[depth].p_hdr->eh_entries = - cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); - if ((err = ext4_ext_dirty(handle, inode, path + depth))) - goto cleanup; - - } - - /* create intermediate indexes */ - k = depth - at - 1; - BUG_ON(k < 0); - if (k) - ext_debug("create %d intermediate indices\n", k); - /* insert new index into current index block */ - /* current depth stored in i var */ - i = depth - 1; - while (k--) { - oldblock = newblock; - newblock = ablocks[--a]; - bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock); - if (!bh) { - err = -EIO; - goto cleanup; - } - lock_buffer(bh); - - if ((err = ext4_journal_get_create_access(handle, bh))) - goto cleanup; - - neh = ext_block_hdr(bh); - neh->eh_entries = cpu_to_le16(1); - neh->eh_magic = EXT4_EXT_MAGIC; - neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); - neh->eh_depth = cpu_to_le16(depth - i); - fidx = EXT_FIRST_INDEX(neh); - fidx->ei_block = border; - ext4_idx_store_pblock(fidx, oldblock); - - ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i, - newblock, (unsigned long) le32_to_cpu(border), - oldblock); - /* copy indexes */ - m = 0; - path[i].p_idx++; - - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, - EXT_MAX_INDEX(path[i].p_hdr)); - BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != - EXT_LAST_INDEX(path[i].p_hdr)); - while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { - ext_debug("%d: move %d:%d in new index %llu\n", i, - le32_to_cpu(path[i].p_idx->ei_block), - idx_pblock(path[i].p_idx), - newblock); - /*memmove(++fidx, path[i].p_idx++, - sizeof(struct ext4_extent_idx)); - neh->eh_entries++; - BUG_ON(neh->eh_entries > neh->eh_max);*/ - path[i].p_idx++; - m++; - } - if (m) { - memmove(++fidx, path[i].p_idx - m, - sizeof(struct ext4_extent_idx) * m); - neh->eh_entries = - cpu_to_le16(le16_to_cpu(neh->eh_entries) + m); - } - set_buffer_uptodate(bh); - unlock_buffer(bh); - - if ((err = ext4_journal_dirty_metadata(handle, bh))) - goto cleanup; - brelse(bh); - bh = NULL; - - /* correct old index */ - if (m) { - err = ext4_ext_get_access(handle, inode, path + i); - if (err) - goto cleanup; - path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); - err = ext4_ext_dirty(handle, inode, path + i); - if (err) - goto cleanup; - } - - i--; - } - - /* insert new index */ - if (err) - goto cleanup; - - err = ext4_ext_insert_index(handle, inode, path + at, - le32_to_cpu(border), newblock); - -cleanup: - if (bh) { - if (buffer_locked(bh)) - unlock_buffer(bh); - brelse(bh); - } - - if (err) { - /* free all allocated blocks in error case */ - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; - ext4_free_blocks(handle, inode, ablocks[i], 1); - } - } - kfree(ablocks); - - return err; -} - -/* - * ext4_ext_grow_indepth: - * implements tree growing procedure: - * - allocates new block - * - moves top-level data (index block or leaf) into the new block - * - initializes new top-level, creating index that points to the - * just created block - */ -static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) -{ - struct ext4_ext_path *curp = path; - struct ext4_extent_header *neh; - struct ext4_extent_idx *fidx; - struct buffer_head *bh; - ext4_fsblk_t newblock; - int err = 0; - - newblock = ext4_ext_new_block(handle, inode, path, newext, &err); - if (newblock == 0) - return err; - - bh = sb_getblk(inode->i_sb, newblock); - if (!bh) { - err = -EIO; - ext4_std_error(inode->i_sb, err); - return err; - } - lock_buffer(bh); - - if ((err = ext4_journal_get_create_access(handle, bh))) { - unlock_buffer(bh); - goto out; - } - - /* move top-level index/leaf into new block */ - memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); - - /* set size of new block */ - neh = ext_block_hdr(bh); - /* old root could have indexes or leaves - * so calculate e_max right way */ - if (ext_depth(inode)) - neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); - else - neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); - neh->eh_magic = EXT4_EXT_MAGIC; - set_buffer_uptodate(bh); - unlock_buffer(bh); - - if ((err = ext4_journal_dirty_metadata(handle, bh))) - goto out; - - /* create index in new top-level index: num,max,pointer */ - if ((err = ext4_ext_get_access(handle, inode, curp))) - goto out; - - curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; - curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); - curp->p_hdr->eh_entries = cpu_to_le16(1); - curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); - /* FIXME: it works, but actually path[0] can be index */ - curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; - ext4_idx_store_pblock(curp->p_idx, newblock); - - neh = ext_inode_hdr(inode); - fidx = EXT_FIRST_INDEX(neh); - ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", - le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), - le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); - - neh->eh_depth = cpu_to_le16(path->p_depth + 1); - err = ext4_ext_dirty(handle, inode, curp); -out: - brelse(bh); - - return err; -} - -/* - * ext4_ext_create_new_leaf: - * finds empty index and adds new leaf. - * if no free index is found, then it requests in-depth growing. - */ -static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) -{ - struct ext4_ext_path *curp; - int depth, i, err = 0; - -repeat: - i = depth = ext_depth(inode); - - /* walk up to the tree and look for free index entry */ - curp = path + depth; - while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { - i--; - curp--; - } - - /* we use already allocated block for index block, - * so subsequent data blocks should be contiguous */ - if (EXT_HAS_FREE_INDEX(curp)) { - /* if we found index with free entry, then use that - * entry: create all needed subtree and add new leaf */ - err = ext4_ext_split(handle, inode, path, newext, i); - - /* refill path */ - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, - le32_to_cpu(newext->ee_block), - path); - if (IS_ERR(path)) - err = PTR_ERR(path); - } else { - /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, path, newext); - if (err) - goto out; - - /* refill path */ - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, - le32_to_cpu(newext->ee_block), - path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - - /* - * only first (depth 0 -> 1) produces free space; - * in all other cases we have to split the grown tree - */ - depth = ext_depth(inode); - if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { - /* now we need to split */ - goto repeat; - } - } - -out: - return err; -} - -/* - * ext4_ext_next_allocated_block: - * returns allocated block in subsequent extent or EXT_MAX_BLOCK. - * NOTE: it considers block number from index entry as - * allocated block. Thus, index entries have to be consistent - * with leaves. - */ -static unsigned long -ext4_ext_next_allocated_block(struct ext4_ext_path *path) -{ - int depth; - - BUG_ON(path == NULL); - depth = path->p_depth; - - if (depth == 0 && path->p_ext == NULL) - return EXT_MAX_BLOCK; - - while (depth >= 0) { - if (depth == path->p_depth) { - /* leaf */ - if (path[depth].p_ext != - EXT_LAST_EXTENT(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_ext[1].ee_block); - } else { - /* index */ - if (path[depth].p_idx != - EXT_LAST_INDEX(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_idx[1].ei_block); - } - depth--; - } - - return EXT_MAX_BLOCK; -} - -/* - * ext4_ext_next_leaf_block: - * returns first allocated block from next leaf or EXT_MAX_BLOCK - */ -static unsigned ext4_ext_next_leaf_block(struct inode *inode, - struct ext4_ext_path *path) -{ - int depth; - - BUG_ON(path == NULL); - depth = path->p_depth; - - /* zero-tree has no leaf blocks at all */ - if (depth == 0) - return EXT_MAX_BLOCK; - - /* go to index block */ - depth--; - - while (depth >= 0) { - if (path[depth].p_idx != - EXT_LAST_INDEX(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_idx[1].ei_block); - depth--; - } - - return EXT_MAX_BLOCK; -} - -/* - * ext4_ext_correct_indexes: - * if leaf gets modified and modified extent is first in the leaf, - * then we have to correct all indexes above. - * TODO: do we need to correct tree in all cases? - */ -int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - struct ext4_extent_header *eh; - int depth = ext_depth(inode); - struct ext4_extent *ex; - __le32 border; - int k, err = 0; - - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - BUG_ON(ex == NULL); - BUG_ON(eh == NULL); - - if (depth == 0) { - /* there is no tree at all */ - return 0; - } - - if (ex != EXT_FIRST_EXTENT(eh)) { - /* we correct tree if first leaf got modified only */ - return 0; - } - - /* - * TODO: we need correction if border is smaller than current one - */ - k = depth - 1; - border = path[depth].p_ext->ee_block; - if ((err = ext4_ext_get_access(handle, inode, path + k))) - return err; - path[k].p_idx->ei_block = border; - if ((err = ext4_ext_dirty(handle, inode, path + k))) - return err; - - while (k--) { - /* change all left-side indexes */ - if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) - break; - if ((err = ext4_ext_get_access(handle, inode, path + k))) - break; - path[k].p_idx->ei_block = border; - if ((err = ext4_ext_dirty(handle, inode, path + k))) - break; - } - - return err; -} - -static int inline -ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, - struct ext4_extent *ex2) -{ - if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != - le32_to_cpu(ex2->ee_block)) - return 0; - - /* - * To allow future support for preallocated extents to be added - * as an RO_COMPAT feature, refuse to merge to extents if - * this can result in the top bit of ee_len being set. - */ - if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) - return 0; -#ifdef AGRESSIVE_TEST - if (le16_to_cpu(ex1->ee_len) >= 4) - return 0; -#endif - - if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) - return 1; - return 0; -} - -/* - * ext4_ext_insert_extent: - * tries to merge requsted extent into the existing extent or - * inserts requested extent as new one into the tree, - * creating new leaf in the no-space case. - */ -int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) -{ - struct ext4_extent_header * eh; - struct ext4_extent *ex, *fex; - struct ext4_extent *nearex; /* nearest extent */ - struct ext4_ext_path *npath = NULL; - int depth, len, err, next; - - BUG_ON(newext->ee_len == 0); - depth = ext_depth(inode); - ex = path[depth].p_ext; - BUG_ON(path[depth].p_hdr == NULL); - - /* try to insert block into found extent and return */ - if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { - ext_debug("append %d block to %d:%d (from %llu)\n", - le16_to_cpu(newext->ee_len), - le32_to_cpu(ex->ee_block), - le16_to_cpu(ex->ee_len), ext_pblock(ex)); - if ((err = ext4_ext_get_access(handle, inode, path + depth))) - return err; - ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) - + le16_to_cpu(newext->ee_len)); - eh = path[depth].p_hdr; - nearex = ex; - goto merge; - } - -repeat: - depth = ext_depth(inode); - eh = path[depth].p_hdr; - if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) - goto has_space; - - /* probably next leaf has space for us? */ - fex = EXT_LAST_EXTENT(eh); - next = ext4_ext_next_leaf_block(inode, path); - if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) - && next != EXT_MAX_BLOCK) { - ext_debug("next leaf block - %d\n", next); - BUG_ON(npath != NULL); - npath = ext4_ext_find_extent(inode, next, NULL); - if (IS_ERR(npath)) - return PTR_ERR(npath); - BUG_ON(npath->p_depth != path->p_depth); - eh = npath[depth].p_hdr; - if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isnt full(%d)\n", - le16_to_cpu(eh->eh_entries)); - path = npath; - goto repeat; - } - ext_debug("next leaf has no free space(%d,%d)\n", - le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); - } - - /* - * There is no free space in the found leaf. - * We're gonna add a new leaf in the tree. - */ - err = ext4_ext_create_new_leaf(handle, inode, path, newext); - if (err) - goto cleanup; - depth = ext_depth(inode); - eh = path[depth].p_hdr; - -has_space: - nearex = path[depth].p_ext; - - if ((err = ext4_ext_get_access(handle, inode, path + depth))) - goto cleanup; - - if (!nearex) { - /* there is no extent in this leaf, create first one */ - ext_debug("first extent in the leaf: %d:%llu:%d\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len)); - path[depth].p_ext = EXT_FIRST_EXTENT(eh); - } else if (le32_to_cpu(newext->ee_block) - > le32_to_cpu(nearex->ee_block)) { -/* BUG_ON(newext->ee_block == nearex->ee_block); */ - if (nearex != EXT_LAST_EXTENT(eh)) { - len = EXT_MAX_EXTENT(eh) - nearex; - len = (len - 1) * sizeof(struct ext4_extent); - len = len < 0 ? 0 : len; - ext_debug("insert %d:%llu:%d after: nearest 0x%p, " - "move %d from 0x%p to 0x%p\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len), - nearex, len, nearex + 1, nearex + 2); - memmove(nearex + 2, nearex + 1, len); - } - path[depth].p_ext = nearex + 1; - } else { - BUG_ON(newext->ee_block == nearex->ee_block); - len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); - len = len < 0 ? 0 : len; - ext_debug("insert %d:%llu:%d before: nearest 0x%p, " - "move %d from 0x%p to 0x%p\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len), - nearex, len, nearex + 1, nearex + 2); - memmove(nearex + 1, nearex, len); - path[depth].p_ext = nearex; - } - - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); - nearex = path[depth].p_ext; - nearex->ee_block = newext->ee_block; - nearex->ee_start = newext->ee_start; - nearex->ee_start_hi = newext->ee_start_hi; - nearex->ee_len = newext->ee_len; - -merge: - /* try to merge extents to the right */ - while (nearex < EXT_LAST_EXTENT(eh)) { - if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) - break; - /* merge with next extent! */ - nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) - + le16_to_cpu(nearex[1].ee_len)); - if (nearex + 1 < EXT_LAST_EXTENT(eh)) { - len = (EXT_LAST_EXTENT(eh) - nearex - 1) - * sizeof(struct ext4_extent); - memmove(nearex + 1, nearex + 2, len); - } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); - BUG_ON(eh->eh_entries == 0); - } - - /* try to merge extents to the left */ - - /* time to correct all indexes above */ - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto cleanup; - - err = ext4_ext_dirty(handle, inode, path + depth); - -cleanup: - if (npath) { - ext4_ext_drop_refs(npath); - kfree(npath); - } - ext4_ext_tree_changed(inode); - ext4_ext_invalidate_cache(inode); - return err; -} - -int ext4_ext_walk_space(struct inode *inode, unsigned long block, - unsigned long num, ext_prepare_callback func, - void *cbdata) -{ - struct ext4_ext_path *path = NULL; - struct ext4_ext_cache cbex; - struct ext4_extent *ex; - unsigned long next, start = 0, end = 0; - unsigned long last = block + num; - int depth, exists, err = 0; - - BUG_ON(func == NULL); - BUG_ON(inode == NULL); - - while (block < last && block != EXT_MAX_BLOCK) { - num = last - block; - /* find extent for this block */ - path = ext4_ext_find_extent(inode, block, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - path = NULL; - break; - } - - depth = ext_depth(inode); - BUG_ON(path[depth].p_hdr == NULL); - ex = path[depth].p_ext; - next = ext4_ext_next_allocated_block(path); - - exists = 0; - if (!ex) { - /* there is no extent yet, so try to allocate - * all requested space */ - start = block; - end = block + num; - } else if (le32_to_cpu(ex->ee_block) > block) { - /* need to allocate space before found extent */ - start = block; - end = le32_to_cpu(ex->ee_block); - if (block + num < end) - end = block + num; - } else if (block >= - le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { - /* need to allocate space after found extent */ - start = block; - end = block + num; - if (end >= next) - end = next; - } else if (block >= le32_to_cpu(ex->ee_block)) { - /* - * some part of requested space is covered - * by found extent - */ - start = block; - end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); - if (block + num < end) - end = block + num; - exists = 1; - } else { - BUG(); - } - BUG_ON(end <= start); - - if (!exists) { - cbex.ec_block = start; - cbex.ec_len = end - start; - cbex.ec_start = 0; - cbex.ec_type = EXT4_EXT_CACHE_GAP; - } else { - cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = le16_to_cpu(ex->ee_len); - cbex.ec_start = ext_pblock(ex); - cbex.ec_type = EXT4_EXT_CACHE_EXTENT; - } - - BUG_ON(cbex.ec_len == 0); - err = func(inode, path, &cbex, cbdata); - ext4_ext_drop_refs(path); - - if (err < 0) - break; - if (err == EXT_REPEAT) - continue; - else if (err == EXT_BREAK) { - err = 0; - break; - } - - if (ext_depth(inode) != depth) { - /* depth was changed. we have to realloc path */ - kfree(path); - path = NULL; - } - - block = cbex.ec_block + cbex.ec_len; - } - - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } - - return err; -} - -static inline void -ext4_ext_put_in_cache(struct inode *inode, __u32 block, - __u32 len, __u32 start, int type) -{ - struct ext4_ext_cache *cex; - BUG_ON(len == 0); - cex = &EXT4_I(inode)->i_cached_extent; - cex->ec_type = type; - cex->ec_block = block; - cex->ec_len = len; - cex->ec_start = start; -} - -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static inline void -ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, - unsigned long block) -{ - int depth = ext_depth(inode); - unsigned long lblock, len; - struct ext4_extent *ex; - - ex = path[depth].p_ext; - if (ex == NULL) { - /* there is no extent yet, so gap is [0;-] */ - lblock = 0; - len = EXT_MAX_BLOCK; - ext_debug("cache gap(whole file):"); - } else if (block < le32_to_cpu(ex->ee_block)) { - lblock = block; - len = le32_to_cpu(ex->ee_block) - block; - ext_debug("cache gap(before): %lu [%lu:%lu]", - (unsigned long) block, - (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len)); - } else if (block >= le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len)) { - lblock = le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len); - len = ext4_ext_next_allocated_block(path); - ext_debug("cache gap(after): [%lu:%lu] %lu", - (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len), - (unsigned long) block); - BUG_ON(len == lblock); - len = len - lblock; - } else { - lblock = len = 0; - BUG(); - } - - ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); - ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); -} - -static inline int -ext4_ext_in_cache(struct inode *inode, unsigned long block, - struct ext4_extent *ex) -{ - struct ext4_ext_cache *cex; - - cex = &EXT4_I(inode)->i_cached_extent; - - /* has cache valid data? */ - if (cex->ec_type == EXT4_EXT_CACHE_NO) - return EXT4_EXT_CACHE_NO; - - BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && - cex->ec_type != EXT4_EXT_CACHE_EXTENT); - if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { - ex->ee_block = cpu_to_le32(cex->ec_block); - ext4_ext_store_pblock(ex, cex->ec_start); - ex->ee_len = cpu_to_le16(cex->ec_len); - ext_debug("%lu cached by %lu:%lu:%llu\n", - (unsigned long) block, - (unsigned long) cex->ec_block, - (unsigned long) cex->ec_len, - cex->ec_start); - return cex->ec_type; - } - - /* not in cache */ - return EXT4_EXT_CACHE_NO; -} - -/* - * ext4_ext_rm_idx: - * removes index from the index block. - * It's used in truncate case only, thus all requests are for - * last index in the block only. - */ -int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - struct buffer_head *bh; - int err; - ext4_fsblk_t leaf; - - /* free index block */ - path--; - leaf = idx_pblock(path->p_idx); - BUG_ON(path->p_hdr->eh_entries == 0); - if ((err = ext4_ext_get_access(handle, inode, path))) - return err; - path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); - if ((err = ext4_ext_dirty(handle, inode, path))) - return err; - ext_debug("index is empty, remove it, free block %llu\n", leaf); - bh = sb_find_get_block(inode->i_sb, leaf); - ext4_forget(handle, 1, inode, bh, leaf); - ext4_free_blocks(handle, inode, leaf, 1); - return err; -} - -/* - * ext4_ext_calc_credits_for_insert: - * This routine returns max. credits that the extent tree can consume. - * It should be OK for low-performance paths like ->writepage() - * To allow many writing processes to fit into a single transaction, - * the caller should calculate credits under truncate_mutex and - * pass the actual path. - */ -int inline ext4_ext_calc_credits_for_insert(struct inode *inode, - struct ext4_ext_path *path) -{ - int depth, needed; - - if (path) { - /* probably there is space in leaf? */ - depth = ext_depth(inode); - if (le16_to_cpu(path[depth].p_hdr->eh_entries) - < le16_to_cpu(path[depth].p_hdr->eh_max)) - return 1; - } - - /* - * given 32-bit logical block (4294967296 blocks), max. tree - * can be 4 levels in depth -- 4 * 340^4 == 53453440000. - * Let's also add one more level for imbalance. - */ - depth = 5; - - /* allocation of new data block(s) */ - needed = 2; - - /* - * tree can be full, so it would need to grow in depth: - * allocation + old root + new root - */ - needed += 2 + 1 + 1; - - /* - * Index split can happen, we would need: - * allocate intermediate indexes (bitmap + group) - * + change two blocks at each level, but root (already included) - */ - needed = (depth * 2) + (depth * 2); - - /* any allocation modifies superblock */ - needed += 1; - - return needed; -} - -static int ext4_remove_blocks(handle_t *handle, struct inode *inode, - struct ext4_extent *ex, - unsigned long from, unsigned long to) -{ - struct buffer_head *bh; - int i; - -#ifdef EXTENTS_STATS - { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - unsigned short ee_len = le16_to_cpu(ex->ee_len); - spin_lock(&sbi->s_ext_stats_lock); - sbi->s_ext_blocks += ee_len; - sbi->s_ext_extents++; - if (ee_len < sbi->s_ext_min) - sbi->s_ext_min = ee_len; - if (ee_len > sbi->s_ext_max) - sbi->s_ext_max = ee_len; - if (ext_depth(inode) > sbi->s_depth_max) - sbi->s_depth_max = ext_depth(inode); - spin_unlock(&sbi->s_ext_stats_lock); - } -#endif - if (from >= le32_to_cpu(ex->ee_block) - && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { - /* tail removal */ - unsigned long num; - ext4_fsblk_t start; - num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; - start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; - ext_debug("free last %lu blocks starting %llu\n", num, start); - for (i = 0; i < num; i++) { - bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, 0, inode, bh, start + i); - } - ext4_free_blocks(handle, inode, start, num); - } else if (from == le32_to_cpu(ex->ee_block) - && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); - } else { - printk("strange request: removal(2) %lu-%lu from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); - } - return 0; -} - -static int -ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, unsigned long start) -{ - int err = 0, correct_index = 0; - int depth = ext_depth(inode), credits; - struct ext4_extent_header *eh; - unsigned a, b, block, num; - unsigned long ex_ee_block; - unsigned short ex_ee_len; - struct ext4_extent *ex; - - ext_debug("truncate since %lu in leaf\n", start); - if (!path[depth].p_hdr) - path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); - eh = path[depth].p_hdr; - BUG_ON(eh == NULL); - BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); - BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); - - /* find where to start removing */ - ex = EXT_LAST_EXTENT(eh); - - ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = le16_to_cpu(ex->ee_len); - - while (ex >= EXT_FIRST_EXTENT(eh) && - ex_ee_block + ex_ee_len > start) { - ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); - path[depth].p_ext = ex; - - a = ex_ee_block > start ? ex_ee_block : start; - b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? - ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; - - ext_debug(" border %u:%u\n", a, b); - - if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { - block = 0; - num = 0; - BUG(); - } else if (a != ex_ee_block) { - /* remove tail of the extent */ - block = ex_ee_block; - num = a - block; - } else if (b != ex_ee_block + ex_ee_len - 1) { - /* remove head of the extent */ - block = a; - num = b - a; - /* there is no "make a hole" API yet */ - BUG(); - } else { - /* remove whole extent: excellent! */ - block = ex_ee_block; - num = 0; - BUG_ON(a != ex_ee_block); - BUG_ON(b != ex_ee_block + ex_ee_len - 1); - } - - /* at present, extent can't cross block group: */ - /* leaf + bitmap + group desc + sb + inode */ - credits = 5; - if (ex == EXT_FIRST_EXTENT(eh)) { - correct_index = 1; - credits += (ext_depth(inode)) + 1; - } -#ifdef CONFIG_QUOTA - credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - - handle = ext4_ext_journal_restart(handle, credits); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto out; - } - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; - - err = ext4_remove_blocks(handle, inode, ex, a, b); - if (err) - goto out; - - if (num == 0) { - /* this extent is removed; mark slot entirely unused */ - ext4_ext_store_pblock(ex, 0); - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); - } - - ex->ee_block = cpu_to_le32(block); - ex->ee_len = cpu_to_le16(num); - - err = ext4_ext_dirty(handle, inode, path + depth); - if (err) - goto out; - - ext_debug("new extent: %u:%u:%llu\n", block, num, - ext_pblock(ex)); - ex--; - ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = le16_to_cpu(ex->ee_len); - } - - if (correct_index && eh->eh_entries) - err = ext4_ext_correct_indexes(handle, inode, path); - - /* if this leaf is free, then we should - * remove it from index block above */ - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); - -out: - return err; -} - -/* - * ext4_ext_more_to_rm: - * returns 1 if current index has to be freed (even partial) - */ -static int inline -ext4_ext_more_to_rm(struct ext4_ext_path *path) -{ - BUG_ON(path->p_idx == NULL); - - if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) - return 0; - - /* - * if truncate on deeper level happened, it wasn't partial, - * so we have to consider current index for truncation - */ - if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) - return 0; - return 1; -} - -int ext4_ext_remove_space(struct inode *inode, unsigned long start) -{ - struct super_block *sb = inode->i_sb; - int depth = ext_depth(inode); - struct ext4_ext_path *path; - handle_t *handle; - int i = 0, err = 0; - - ext_debug("truncate since %lu\n", start); - - /* probably first extent we're gonna free will be last in block */ - handle = ext4_journal_start(inode, depth + 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - ext4_ext_invalidate_cache(inode); - - /* - * We start scanning from right side, freeing all the blocks - * after i_size and walking into the tree depth-wise. - */ - path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); - if (path == NULL) { - ext4_journal_stop(handle); - return -ENOMEM; - } - memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1)); - path[0].p_hdr = ext_inode_hdr(inode); - if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) { - err = -EIO; - goto out; - } - path[0].p_depth = depth; - - while (i >= 0 && err == 0) { - if (i == depth) { - /* this is leaf block */ - err = ext4_ext_rm_leaf(handle, inode, path, start); - /* root level has p_bh == NULL, brelse() eats this */ - brelse(path[i].p_bh); - path[i].p_bh = NULL; - i--; - continue; - } - - /* this is index block */ - if (!path[i].p_hdr) { - ext_debug("initialize header\n"); - path[i].p_hdr = ext_block_hdr(path[i].p_bh); - if (ext4_ext_check_header(__FUNCTION__, inode, - path[i].p_hdr)) { - err = -EIO; - goto out; - } - } - - BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries) - > le16_to_cpu(path[i].p_hdr->eh_max)); - BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC); - - if (!path[i].p_idx) { - /* this level hasn't been touched yet */ - path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); - path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; - ext_debug("init index ptr: hdr 0x%p, num %d\n", - path[i].p_hdr, - le16_to_cpu(path[i].p_hdr->eh_entries)); - } else { - /* we were already here, see at next index */ - path[i].p_idx--; - } - - ext_debug("level %d - index, first 0x%p, cur 0x%p\n", - i, EXT_FIRST_INDEX(path[i].p_hdr), - path[i].p_idx); - if (ext4_ext_more_to_rm(path + i)) { - /* go to the next level */ - ext_debug("move to level %d (block %llu)\n", - i + 1, idx_pblock(path[i].p_idx)); - memset(path + i + 1, 0, sizeof(*path)); - path[i+1].p_bh = - sb_bread(sb, idx_pblock(path[i].p_idx)); - if (!path[i+1].p_bh) { - /* should we reset i_size? */ - err = -EIO; - break; - } - - /* save actual number of indexes since this - * number is changed at the next iteration */ - path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); - i++; - } else { - /* we finished processing this index, go up */ - if (path[i].p_hdr->eh_entries == 0 && i > 0) { - /* index is empty, remove it; - * handle must be already prepared by the - * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); - } - /* root level has p_bh == NULL, brelse() eats this */ - brelse(path[i].p_bh); - path[i].p_bh = NULL; - i--; - ext_debug("return to level %d\n", i); - } - } - - /* TODO: flexible tree reduction should be here */ - if (path->p_hdr->eh_entries == 0) { - /* - * truncate to zero freed all the tree, - * so we need to correct eh_depth - */ - err = ext4_ext_get_access(handle, inode, path); - if (err == 0) { - ext_inode_hdr(inode)->eh_depth = 0; - ext_inode_hdr(inode)->eh_max = - cpu_to_le16(ext4_ext_space_root(inode)); - err = ext4_ext_dirty(handle, inode, path); - } - } -out: - ext4_ext_tree_changed(inode); - ext4_ext_drop_refs(path); - kfree(path); - ext4_journal_stop(handle); - - return err; -} - -/* - * called at mount time - */ -void ext4_ext_init(struct super_block *sb) -{ - /* - * possible initialization would be here - */ - - if (test_opt(sb, EXTENTS)) { - printk("EXT4-fs: file extents enabled"); -#ifdef AGRESSIVE_TEST - printk(", agressive tests"); -#endif -#ifdef CHECK_BINSEARCH - printk(", check binsearch"); -#endif -#ifdef EXTENTS_STATS - printk(", stats"); -#endif - printk("\n"); -#ifdef EXTENTS_STATS - spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); - EXT4_SB(sb)->s_ext_min = 1 << 30; - EXT4_SB(sb)->s_ext_max = 0; -#endif - } -} - -/* - * called at umount time - */ -void ext4_ext_release(struct super_block *sb) -{ - if (!test_opt(sb, EXTENTS)) - return; - -#ifdef EXTENTS_STATS - if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { - struct ext4_sb_info *sbi = EXT4_SB(sb); - printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", - sbi->s_ext_blocks, sbi->s_ext_extents, - sbi->s_ext_blocks / sbi->s_ext_extents); - printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", - sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); - } -#endif -} - -int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t iblock, - unsigned long max_blocks, struct buffer_head *bh_result, - int create, int extend_disksize) -{ - struct ext4_ext_path *path = NULL; - struct ext4_extent newex, *ex; - ext4_fsblk_t goal, newblock; - int err = 0, depth; - unsigned long allocated = 0; - - __clear_bit(BH_New, &bh_result->b_state); - ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, - max_blocks, (unsigned) inode->i_ino); - mutex_lock(&EXT4_I(inode)->truncate_mutex); - - /* check in cache */ - if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) { - if (goal == EXT4_EXT_CACHE_GAP) { - if (!create) { - /* block isn't allocated yet and - * user doesn't want to allocate it */ - goto out2; - } - /* we should allocate requested block */ - } else if (goal == EXT4_EXT_CACHE_EXTENT) { - /* block is already allocated */ - newblock = iblock - - le32_to_cpu(newex.ee_block) - + ext_pblock(&newex); - /* number of remaining blocks in the extent */ - allocated = le16_to_cpu(newex.ee_len) - - (iblock - le32_to_cpu(newex.ee_block)); - goto out; - } else { - BUG(); - } - } - - /* find extent for this block */ - path = ext4_ext_find_extent(inode, iblock, NULL); - if (IS_ERR(path)) { - err = PTR_ERR(path); - path = NULL; - goto out2; - } - - depth = ext_depth(inode); - - /* - * consistent leaf must not be empty; - * this situation is possible, though, _during_ tree modification; - * this is why assert can't be put in ext4_ext_find_extent() - */ - BUG_ON(path[depth].p_ext == NULL && depth != 0); - - if ((ex = path[depth].p_ext)) { - unsigned long ee_block = le32_to_cpu(ex->ee_block); - ext4_fsblk_t ee_start = ext_pblock(ex); - unsigned short ee_len = le16_to_cpu(ex->ee_len); - - /* - * Allow future support for preallocated extents to be added - * as an RO_COMPAT feature: - * Uninitialized extents are treated as holes, except that - * we avoid (fail) allocating new blocks during a write. - */ - if (ee_len > EXT_MAX_LEN) - goto out2; - /* if found extent covers block, simply return it */ - if (iblock >= ee_block && iblock < ee_block + ee_len) { - newblock = iblock - ee_block + ee_start; - /* number of remaining blocks in the extent */ - allocated = ee_len - (iblock - ee_block); - ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, - ee_block, ee_len, newblock); - ext4_ext_put_in_cache(inode, ee_block, ee_len, - ee_start, EXT4_EXT_CACHE_EXTENT); - goto out; - } - } - - /* - * requested block isn't allocated yet; - * we couldn't try to create block if create flag is zero - */ - if (!create) { - /* put just found gap into cache to speed up - * subsequent requests */ - ext4_ext_put_gap_in_cache(inode, path, iblock); - goto out2; - } - /* - * Okay, we need to do block allocation. Lazily initialize the block - * allocation info here if necessary. - */ - if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) - ext4_init_block_alloc_info(inode); - - /* allocate new block */ - goal = ext4_ext_find_goal(inode, path, iblock); - allocated = max_blocks; - newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); - if (!newblock) - goto out2; - ext_debug("allocate new block: goal %llu, found %llu/%lu\n", - goal, newblock, allocated); - - /* try to insert new extent into found leaf and return */ - newex.ee_block = cpu_to_le32(iblock); - ext4_ext_store_pblock(&newex, newblock); - newex.ee_len = cpu_to_le16(allocated); - err = ext4_ext_insert_extent(handle, inode, path, &newex); - if (err) - goto out2; - - if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = inode->i_size; - - /* previous routine could use block we allocated */ - newblock = ext_pblock(&newex); - __set_bit(BH_New, &bh_result->b_state); - - ext4_ext_put_in_cache(inode, iblock, allocated, newblock, - EXT4_EXT_CACHE_EXTENT); -out: - if (allocated > max_blocks) - allocated = max_blocks; - ext4_ext_show_leaf(inode, path); - __set_bit(BH_Mapped, &bh_result->b_state); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; -out2: - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } - mutex_unlock(&EXT4_I(inode)->truncate_mutex); - - return err ? err : allocated; -} - -void ext4_ext_truncate(struct inode * inode, struct page *page) -{ - struct address_space *mapping = inode->i_mapping; - struct super_block *sb = inode->i_sb; - unsigned long last_block; - handle_t *handle; - int err = 0; - - /* - * probably first extent we're gonna free will be last in block - */ - err = ext4_writepage_trans_blocks(inode) + 3; - handle = ext4_journal_start(inode, err); - if (IS_ERR(handle)) { - if (page) { - clear_highpage(page); - flush_dcache_page(page); - unlock_page(page); - page_cache_release(page); - } - return; - } - - if (page) - ext4_block_truncate_page(handle, page, mapping, inode->i_size); - - mutex_lock(&EXT4_I(inode)->truncate_mutex); - ext4_ext_invalidate_cache(inode); - - /* - * TODO: optimization is possible here. - * Probably we need not scan at all, - * because page truncation is enough. - */ - if (ext4_orphan_add(handle, inode)) - goto out_stop; - - /* we have to know where to truncate from in crash case */ - EXT4_I(inode)->i_disksize = inode->i_size; - ext4_mark_inode_dirty(handle, inode); - - last_block = (inode->i_size + sb->s_blocksize - 1) - >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block); - - /* In a multi-transaction truncate, we only make the final - * transaction synchronous. */ - if (IS_SYNC(inode)) - handle->h_sync = 1; - -out_stop: - /* - * If this was a simple ftruncate() and the file will remain alive, - * then we need to clear up the orphan record which we created above. - * However, if this was a real unlink then we were called by - * ext4_delete_inode(), and we allow that function to clean up the - * orphan info for us. - */ - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - - mutex_unlock(&EXT4_I(inode)->truncate_mutex); - ext4_journal_stop(handle); -} - -/* - * ext4_ext_writepage_trans_blocks: - * calculate max number of blocks we could modify - * in order to allocate new block for an inode - */ -int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) -{ - int needed; - - needed = ext4_ext_calc_credits_for_insert(inode, NULL); - - /* caller wants to allocate num blocks, but note it includes sb */ - needed = needed * num - (num - 1); - -#ifdef CONFIG_QUOTA - needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - - return needed; -} - -EXPORT_SYMBOL(ext4_mark_inode_dirty); -EXPORT_SYMBOL(ext4_ext_invalidate_cache); -EXPORT_SYMBOL(ext4_ext_insert_extent); -EXPORT_SYMBOL(ext4_ext_walk_space); -EXPORT_SYMBOL(ext4_ext_find_goal); -EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert); - diff --git a/trunk/fs/ext4/file.c b/trunk/fs/ext4/file.c deleted file mode 100644 index 0b622c0624b7..000000000000 --- a/trunk/fs/ext4/file.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * linux/fs/ext4/file.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/file.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext4 fs regular file handling primitives - * - * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) - */ - -#include -#include -#include -#include -#include -#include "xattr.h" -#include "acl.h" - -/* - * Called when an inode is released. Note that this is different - * from ext4_file_open: open gets called at every open, but release - * gets called only when /all/ the files are closed. - */ -static int ext4_release_file (struct inode * inode, struct file * filp) -{ - /* if we are the last writer on the inode, drop the block reservation */ - if ((filp->f_mode & FMODE_WRITE) && - (atomic_read(&inode->i_writecount) == 1)) - { - mutex_lock(&EXT4_I(inode)->truncate_mutex); - ext4_discard_reservation(inode); - mutex_unlock(&EXT4_I(inode)->truncate_mutex); - } - if (is_dx(inode) && filp->private_data) - ext4_htree_free_dir_info(filp->private_data); - - return 0; -} - -static ssize_t -ext4_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_dentry->d_inode; - ssize_t ret; - int err; - - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - - /* - * Skip flushing if there was an error, or if nothing was written. - */ - if (ret <= 0) - return ret; - - /* - * If the inode is IS_SYNC, or is O_SYNC and we are doing data - * journalling then we need to make sure that we force the transaction - * to disk to keep all metadata uptodate synchronously. - */ - if (file->f_flags & O_SYNC) { - /* - * If we are non-data-journaled, then the dirty data has - * already been flushed to backing store by generic_osync_inode, - * and the inode has been flushed too if there have been any - * modifications other than mere timestamp updates. - * - * Open question --- do we care about flushing timestamps too - * if the inode is IS_SYNC? - */ - if (!ext4_should_journal_data(inode)) - return ret; - - goto force_commit; - } - - /* - * So we know that there has been no forced data flush. If the inode - * is marked IS_SYNC, we need to force one ourselves. - */ - if (!IS_SYNC(inode)) - return ret; - - /* - * Open question #2 --- should we force data to disk here too? If we - * don't, the only impact is that data=writeback filesystems won't - * flush data to disk automatically on IS_SYNC, only metadata (but - * historically, that is what ext2 has done.) - */ - -force_commit: - err = ext4_force_commit(inode->i_sb); - if (err) - return err; - return ret; -} - -const struct file_operations ext4_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = ext4_file_write, - .ioctl = ext4_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext4_compat_ioctl, -#endif - .mmap = generic_file_mmap, - .open = generic_file_open, - .release = ext4_release_file, - .fsync = ext4_sync_file, - .sendfile = generic_file_sendfile, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, -}; - -struct inode_operations ext4_file_inode_operations = { - .truncate = ext4_truncate, - .setattr = ext4_setattr, -#ifdef CONFIG_EXT4DEV_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext4_listxattr, - .removexattr = generic_removexattr, -#endif - .permission = ext4_permission, -}; - diff --git a/trunk/fs/ext4/fsync.c b/trunk/fs/ext4/fsync.c deleted file mode 100644 index 2a167d7131fa..000000000000 --- a/trunk/fs/ext4/fsync.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * linux/fs/ext4/fsync.c - * - * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) - * from - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * from - * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * - * ext4fs fsync primitive - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Removed unnecessary code duplication for little endian machines - * and excessive __inline__s. - * Andi Kleen, 1997 - * - * Major simplications and cleanup - we only need to do the metadata, because - * we can depend on generic_block_fdatasync() to sync the data blocks. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * akpm: A new design for ext4_sync_file(). - * - * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). - * There cannot be a transaction open by this task. - * Another task could have dirtied this inode. Its data can be in any - * state in the journalling system. - * - * What we do is just kick off a commit and wait on it. This will snapshot the - * inode to disk. - */ - -int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int ret = 0; - - J_ASSERT(ext4_journal_current_handle() == 0); - - /* - * data=writeback: - * The caller's filemap_fdatawrite()/wait will sync the data. - * sync_inode() will sync the metadata - * - * data=ordered: - * The caller's filemap_fdatawrite() will write the data and - * sync_inode() will write the inode if it is dirty. Then the caller's - * filemap_fdatawait() will wait on the pages. - * - * data=journal: - * filemap_fdatawrite won't do anything (the buffers are clean). - * ext4_force_commit will write the file data into the journal and - * will wait on that. - * filemap_fdatawait() will encounter a ton of newly-dirtied pages - * (they were dirtied by commit). But that's OK - the blocks are - * safe in-journal, which is all fsync() needs to ensure. - */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - goto out; - } - - /* - * The VFS has written the file data. If the inode is unaltered - * then we need not start a commit. - */ - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, /* sys_fsync did this */ - }; - ret = sync_inode(inode, &wbc); - } -out: - return ret; -} diff --git a/trunk/fs/ext4/hash.c b/trunk/fs/ext4/hash.c deleted file mode 100644 index a67966385e06..000000000000 --- a/trunk/fs/ext4/hash.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * linux/fs/ext4/hash.c - * - * Copyright (C) 2002 by Theodore Ts'o - * - * This file is released under the GPL v2. - * - * This file may be redistributed under the terms of the GNU Public - * License. - */ - -#include -#include -#include -#include -#include - -#define DELTA 0x9E3779B9 - -static void TEA_transform(__u32 buf[4], __u32 const in[]) -{ - __u32 sum = 0; - __u32 b0 = buf[0], b1 = buf[1]; - __u32 a = in[0], b = in[1], c = in[2], d = in[3]; - int n = 16; - - do { - sum += DELTA; - b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); - b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); - } while(--n); - - buf[0] += b0; - buf[1] += b1; -} - - -/* The old legacy hash */ -static __u32 dx_hack_hash (const char *name, int len) -{ - __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; - while (len--) { - __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); - - if (hash & 0x80000000) hash -= 0x7fffffff; - hash1 = hash0; - hash0 = hash; - } - return (hash0 << 1); -} - -static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -{ - __u32 pad, val; - int i; - - pad = (__u32)len | ((__u32)len << 8); - pad |= pad << 16; - - val = pad; - if (len > num*4) - len = num * 4; - for (i=0; i < len; i++) { - if ((i % 4) == 0) - val = pad; - val = msg[i] + (val << 8); - if ((i % 4) == 3) { - *buf++ = val; - val = pad; - num--; - } - } - if (--num >= 0) - *buf++ = val; - while (--num >= 0) - *buf++ = pad; -} - -/* - * Returns the hash of a filename. If len is 0 and name is NULL, then - * this function can be used to test whether or not a hash version is - * supported. - * - * The seed is an 4 longword (32 bits) "secret" which can be used to - * uniquify a hash. If the seed is all zero's, then some default seed - * may be used. - * - * A particular hash version specifies whether or not the seed is - * represented, and whether or not the returned hash is 32 bits or 64 - * bits. 32 bit hashes will return 0 for the minor hash. - */ -int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -{ - __u32 hash; - __u32 minor_hash = 0; - const char *p; - int i; - __u32 in[8], buf[4]; - - /* Initialize the default seed for the hash checksum functions */ - buf[0] = 0x67452301; - buf[1] = 0xefcdab89; - buf[2] = 0x98badcfe; - buf[3] = 0x10325476; - - /* Check to see if the seed is all zero's */ - if (hinfo->seed) { - for (i=0; i < 4; i++) { - if (hinfo->seed[i]) - break; - } - if (i < 4) - memcpy(buf, hinfo->seed, sizeof(buf)); - } - - switch (hinfo->hash_version) { - case DX_HASH_LEGACY: - hash = dx_hack_hash(name, len); - break; - case DX_HASH_HALF_MD4: - p = name; - while (len > 0) { - str2hashbuf(p, len, in, 8); - half_md4_transform(buf, in); - len -= 32; - p += 32; - } - minor_hash = buf[2]; - hash = buf[1]; - break; - case DX_HASH_TEA: - p = name; - while (len > 0) { - str2hashbuf(p, len, in, 4); - TEA_transform(buf, in); - len -= 16; - p += 16; - } - hash = buf[0]; - minor_hash = buf[1]; - break; - default: - hinfo->hash = 0; - return -1; - } - hash = hash & ~1; - if (hash == (EXT4_HTREE_EOF << 1)) - hash = (EXT4_HTREE_EOF-1) << 1; - hinfo->hash = hash; - hinfo->minor_hash = minor_hash; - return 0; -} diff --git a/trunk/fs/ext4/ialloc.c b/trunk/fs/ext4/ialloc.c deleted file mode 100644 index c88b439ba5cd..000000000000 --- a/trunk/fs/ext4/ialloc.c +++ /dev/null @@ -1,772 +0,0 @@ -/* - * linux/fs/ext4/ialloc.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * BSD ufs-inspired inode and directory allocation by - * Stephen Tweedie (sct@redhat.com), 1993 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xattr.h" -#include "acl.h" - -/* - * ialloc.c contains the inodes allocation and deallocation routines - */ - -/* - * The free inodes are managed by bitmaps. A file system contains several - * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap - * block for inodes, N blocks for the inode table and data blocks. - * - * The file system contains group descriptors which are located after the - * super block. Each descriptor contains the number of the bitmap block and - * the free blocks count in the block. - */ - - -/* - * Read the inode allocation bitmap for a given block_group, reading - * into the specified slot in the superblock's bitmap cache. - * - * Return buffer_head of bitmap on success or NULL. - */ -static struct buffer_head * -read_inode_bitmap(struct super_block * sb, unsigned long block_group) -{ - struct ext4_group_desc *desc; - struct buffer_head *bh = NULL; - - desc = ext4_get_group_desc(sb, block_group, NULL); - if (!desc) - goto error_out; - - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); - if (!bh) - ext4_error(sb, "read_inode_bitmap", - "Cannot read inode bitmap - " - "block_group = %lu, inode_bitmap = %llu", - block_group, ext4_inode_bitmap(sb, desc)); -error_out: - return bh; -} - -/* - * NOTE! When we get the inode, we're the only people - * that have access to it, and as such there are no - * race conditions we have to worry about. The inode - * is not on the hash-lists, and it cannot be reached - * through the filesystem because the directory entry - * has been deleted earlier. - * - * HOWEVER: we must make sure that we get no aliases, - * which means that we have to call "clear_inode()" - * _before_ we mark the inode not in use in the inode - * bitmaps. Otherwise a newly created file might use - * the same inode number (not actually the same pointer - * though), and then we'd have two inodes sharing the - * same inode number and space on the harddisk. - */ -void ext4_free_inode (handle_t *handle, struct inode * inode) -{ - struct super_block * sb = inode->i_sb; - int is_directory; - unsigned long ino; - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; - unsigned long block_group; - unsigned long bit; - struct ext4_group_desc * gdp; - struct ext4_super_block * es; - struct ext4_sb_info *sbi; - int fatal = 0, err; - - if (atomic_read(&inode->i_count) > 1) { - printk ("ext4_free_inode: inode has count=%d\n", - atomic_read(&inode->i_count)); - return; - } - if (inode->i_nlink) { - printk ("ext4_free_inode: inode has nlink=%d\n", - inode->i_nlink); - return; - } - if (!sb) { - printk("ext4_free_inode: inode on nonexistent device\n"); - return; - } - sbi = EXT4_SB(sb); - - ino = inode->i_ino; - ext4_debug ("freeing inode %lu\n", ino); - - /* - * Note: we must free any quota before locking the superblock, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); - ext4_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - - is_directory = S_ISDIR(inode->i_mode); - - /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode (inode); - - es = EXT4_SB(sb)->s_es; - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_free_inode", - "reserved or nonexistent inode %lu", ino); - goto error_return; - } - block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); - bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); - if (!bitmap_bh) - goto error_return; - - BUFFER_TRACE(bitmap_bh, "get_write_access"); - fatal = ext4_journal_get_write_access(handle, bitmap_bh); - if (fatal) - goto error_return; - - /* Ok, now we can actually update the inode bitmaps.. */ - if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), - bit, bitmap_bh->b_data)) - ext4_error (sb, "ext4_free_inode", - "bit already cleared for inode %lu", ino); - else { - gdp = ext4_get_group_desc (sb, block_group, &bh2); - - BUFFER_TRACE(bh2, "get_write_access"); - fatal = ext4_journal_get_write_access(handle, bh2); - if (fatal) goto error_return; - - if (gdp) { - spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_inodes_count = cpu_to_le16( - le16_to_cpu(gdp->bg_free_inodes_count) + 1); - if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); - if (is_directory) - percpu_counter_dec(&sbi->s_dirs_counter); - - } - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); - if (!fatal) fatal = err; - } - BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); - if (!fatal) - fatal = err; - sb->s_dirt = 1; -error_return: - brelse(bitmap_bh); - ext4_std_error(sb, fatal); -} - -/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of - * the groups with above-average free space, that group with the fewest - * directories already is chosen. - * - * For other inodes, search forward from the parent directory\'s block - * group to find a free inode. - */ -static int find_group_dir(struct super_block *sb, struct inode *parent) -{ - int ngroups = EXT4_SB(sb)->s_groups_count; - unsigned int freei, avefreei; - struct ext4_group_desc *desc, *best_desc = NULL; - struct buffer_head *bh; - int group, best_group = -1; - - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); - avefreei = freei / ngroups; - - for (group = 0; group < ngroups; group++) { - desc = ext4_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) - continue; - if (!best_desc || - (le16_to_cpu(desc->bg_free_blocks_count) > - le16_to_cpu(best_desc->bg_free_blocks_count))) { - best_group = group; - best_desc = desc; - } - } - return best_group; -} - -/* - * Orlov's allocator for directories. - * - * We always try to spread first-level directories. - * - * If there are blockgroups with both free inodes and free blocks counts - * not worse than average we return one with smallest directory count. - * Otherwise we simply return a random group. - * - * For the rest rules look so: - * - * It's OK to put directory into a group unless - * it has too many directories already (max_dirs) or - * it has too few free inodes left (min_inodes) or - * it has too few free blocks left (min_blocks) or - * it's already running too large debt (max_debt). - * Parent's group is prefered, if it doesn't satisfy these - * conditions we search cyclically through the rest. If none - * of the groups look good we just look for a group with more - * free inodes than average (starting at parent's group). - * - * Debt is incremented each time we allocate a directory and decremented - * when we allocate an inode, within 0--255. - */ - -#define INODE_COST 64 -#define BLOCK_COST 256 - -static int find_group_orlov(struct super_block *sb, struct inode *parent) -{ - int parent_group = EXT4_I(parent)->i_block_group; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - int ngroups = sbi->s_groups_count; - int inodes_per_group = EXT4_INODES_PER_GROUP(sb); - unsigned int freei, avefreei; - ext4_fsblk_t freeb, avefreeb; - ext4_fsblk_t blocks_per_dir; - unsigned int ndirs; - int max_debt, max_dirs, min_inodes; - ext4_grpblk_t min_blocks; - int group = -1, i; - struct ext4_group_desc *desc; - struct buffer_head *bh; - - freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); - avefreei = freei / ngroups; - freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - avefreeb = freeb; - do_div(avefreeb, ngroups); - ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); - - if ((parent == sb->s_root->d_inode) || - (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { - int best_ndir = inodes_per_group; - int best_group = -1; - - get_random_bytes(&group, sizeof(group)); - parent_group = (unsigned)group % ngroups; - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) - continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) - continue; - best_group = group; - best_ndir = le16_to_cpu(desc->bg_used_dirs_count); - } - if (best_group >= 0) - return best_group; - goto fallback; - } - - blocks_per_dir = ext4_blocks_count(es) - freeb; - do_div(blocks_per_dir, ndirs); - - max_dirs = ndirs / ngroups + inodes_per_group / 16; - min_inodes = avefreei - inodes_per_group / 4; - min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; - - max_debt = EXT4_BLOCKS_PER_GROUP(sb); - max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); - if (max_debt * INODE_COST > inodes_per_group) - max_debt = inodes_per_group / INODE_COST; - if (max_debt > 255) - max_debt = 255; - if (max_debt == 0) - max_debt = 1; - - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) - continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) - continue; - return group; - } - -fallback: - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) - return group; - } - - if (avefreei) { - /* - * The free-inodes counter is approximate, and for really small - * filesystems the above test can fail to find any blockgroups - */ - avefreei = 0; - goto fallback; - } - - return -1; -} - -static int find_group_other(struct super_block *sb, struct inode *parent) -{ - int parent_group = EXT4_I(parent)->i_block_group; - int ngroups = EXT4_SB(sb)->s_groups_count; - struct ext4_group_desc *desc; - struct buffer_head *bh; - int group, i; - - /* - * Try to place the inode in its parent directory - */ - group = parent_group; - desc = ext4_get_group_desc (sb, group, &bh); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) - return group; - - /* - * We're going to place this inode in a different blockgroup from its - * parent. We want to cause files in a common directory to all land in - * the same blockgroup. But we want files which are in a different - * directory which shares a blockgroup with our parent to land in a - * different blockgroup. - * - * So add our directory's i_ino into the starting point for the hash. - */ - group = (group + parent->i_ino) % ngroups; - - /* - * Use a quadratic hash to find a group with a free inode and some free - * blocks. - */ - for (i = 1; i < ngroups; i <<= 1) { - group += i; - if (group >= ngroups) - group -= ngroups; - desc = ext4_get_group_desc (sb, group, &bh); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) - return group; - } - - /* - * That failed: try linear search for a free inode, even if that group - * has no free blocks. - */ - group = parent_group; - for (i = 0; i < ngroups; i++) { - if (++group >= ngroups) - group = 0; - desc = ext4_get_group_desc (sb, group, &bh); - if (desc && le16_to_cpu(desc->bg_free_inodes_count)) - return group; - } - - return -1; -} - -/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of - * the groups with above-average free space, that group with the fewest - * directories already is chosen. - * - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ -struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) -{ - struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; - int group; - unsigned long ino = 0; - struct inode * inode; - struct ext4_group_desc * gdp = NULL; - struct ext4_super_block * es; - struct ext4_inode_info *ei; - struct ext4_sb_info *sbi; - int err = 0; - struct inode *ret; - int i; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) - return ERR_PTR(-EPERM); - - sb = dir->i_sb; - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); - ei = EXT4_I(inode); - - sbi = EXT4_SB(sb); - es = sbi->s_es; - if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) - group = find_group_dir(sb, dir); - else - group = find_group_orlov(sb, dir); - } else - group = find_group_other(sb, dir); - - err = -ENOSPC; - if (group == -1) - goto out; - - for (i = 0; i < sbi->s_groups_count; i++) { - err = -EIO; - - gdp = ext4_get_group_desc(sb, group, &bh2); - if (!gdp) - goto fail; - - brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, group); - if (!bitmap_bh) - goto fail; - - ino = 0; - -repeat_in_this_group: - ino = ext4_find_next_zero_bit((unsigned long *) - bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); - if (ino < EXT4_INODES_PER_GROUP(sb)) { - - BUFFER_TRACE(bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bitmap_bh); - if (err) - goto fail; - - if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), - ino, bitmap_bh->b_data)) { - /* we won it */ - BUFFER_TRACE(bitmap_bh, - "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, - bitmap_bh); - if (err) - goto fail; - goto got; - } - /* we lost it */ - jbd2_journal_release_buffer(handle, bitmap_bh); - - if (++ino < EXT4_INODES_PER_GROUP(sb)) - goto repeat_in_this_group; - } - - /* - * This case is possible in concurrent environment. It is very - * rare. We cannot repeat the find_group_xxx() call because - * that will simply return the same blockgroup, because the - * group descriptor metadata has not yet been updated. - * So we just go onto the next blockgroup. - */ - if (++group == sbi->s_groups_count) - group = 0; - } - err = -ENOSPC; - goto out; - -got: - ino += group * EXT4_INODES_PER_GROUP(sb) + 1; - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_new_inode", - "reserved inode or inode > inodes count - " - "block_group = %d, inode=%lu", group, ino); - err = -EIO; - goto fail; - } - - BUFFER_TRACE(bh2, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh2); - if (err) goto fail; - spin_lock(sb_bgl_lock(sbi, group)); - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); - if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); - } - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - - percpu_counter_dec(&sbi->s_freeinodes_counter); - if (S_ISDIR(mode)) - percpu_counter_inc(&sbi->s_dirs_counter); - sb->s_dirt = 1; - - inode->i_uid = current->fsuid; - if (test_opt (sb, GRPID)) - inode->i_gid = dir->i_gid; - else if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - inode->i_gid = current->fsgid; - inode->i_mode = mode; - - inode->i_ino = ino; - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - - memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - - ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL; - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); - /* dirsync only applies to directories */ - if (!S_ISDIR(mode)) - ei->i_flags &= ~EXT4_DIRSYNC_FL; -#ifdef EXT4_FRAGMENTS - ei->i_faddr = 0; - ei->i_frag_no = 0; - ei->i_frag_size = 0; -#endif - ei->i_file_acl = 0; - ei->i_dir_acl = 0; - ei->i_dtime = 0; - ei->i_block_alloc_info = NULL; - ei->i_block_group = group; - - ext4_set_inode_flags(inode); - if (IS_DIRSYNC(inode)) - handle->h_sync = 1; - insert_inode_hash(inode); - spin_lock(&sbi->s_next_gen_lock); - inode->i_generation = sbi->s_next_generation++; - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state = EXT4_STATE_NEW; - ei->i_extra_isize = - (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ? - sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { - err = -EDQUOT; - goto fail_drop; - } - - err = ext4_init_acl(handle, inode, dir); - if (err) - goto fail_free_drop; - - err = ext4_init_security(handle,inode, dir); - if (err) - goto fail_free_drop; - - err = ext4_mark_inode_dirty(handle, inode); - if (err) { - ext4_std_error(sb, err); - goto fail_free_drop; - } - if (test_opt(sb, EXTENTS)) { - EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; - ext4_ext_tree_init(handle, inode); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); - if (err) goto fail; - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS); - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); - } - } - - ext4_debug("allocating inode %lu\n", inode->i_ino); - goto really_out; -fail: - ext4_std_error(sb, err); -out: - iput(inode); - ret = ERR_PTR(err); -really_out: - brelse(bitmap_bh); - return ret; - -fail_free_drop: - DQUOT_FREE_INODE(inode); - -fail_drop: - DQUOT_DROP(inode); - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; - iput(inode); - brelse(bitmap_bh); - return ERR_PTR(err); -} - -/* Verify that we are loading a valid orphan from disk */ -struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) -{ - unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); - unsigned long block_group; - int bit; - struct buffer_head *bitmap_bh = NULL; - struct inode *inode = NULL; - - /* Error cases - e2fsck has already cleaned up for us */ - if (ino > max_ino) { - ext4_warning(sb, __FUNCTION__, - "bad orphan ino %lu! e2fsck was run?", ino); - goto out; - } - - block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); - bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); - if (!bitmap_bh) { - ext4_warning(sb, __FUNCTION__, - "inode bitmap error for orphan %lu", ino); - goto out; - } - - /* Having the inode bit set should be a 100% indicator that this - * is a valid orphan (no e2fsck run on fs). Orphans also include - * inodes that were being truncated, so we can't check i_nlink==0. - */ - if (!ext4_test_bit(bit, bitmap_bh->b_data) || - !(inode = iget(sb, ino)) || is_bad_inode(inode) || - NEXT_ORPHAN(inode) > max_ino) { - ext4_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } - /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) - inode->i_blocks = 0; - iput(inode); - inode = NULL; - } -out: - brelse(bitmap_bh); - return inode; -} - -unsigned long ext4_count_free_inodes (struct super_block * sb) -{ - unsigned long desc_count; - struct ext4_group_desc *gdp; - int i; -#ifdef EXT4FS_DEBUG - struct ext4_super_block *es; - unsigned long bitmap_count, x; - struct buffer_head *bitmap_bh = NULL; - - es = EXT4_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - gdp = ext4_get_group_desc (sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); - brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, i); - if (!bitmap_bh) - continue; - - x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); - printk("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); - bitmap_count += x; - } - brelse(bitmap_bh); - printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); - return desc_count; -#else - desc_count = 0; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - gdp = ext4_get_group_desc (sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); - cond_resched(); - } - return desc_count; -#endif -} - -/* Called at mount-time, super-block is locked */ -unsigned long ext4_count_dirs (struct super_block * sb) -{ - unsigned long count = 0; - int i; - - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); - if (!gdp) - continue; - count += le16_to_cpu(gdp->bg_used_dirs_count); - } - return count; -} - diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c deleted file mode 100644 index 0a60ec5a16db..000000000000 --- a/trunk/fs/ext4/inode.c +++ /dev/null @@ -1,3233 +0,0 @@ -/* - * linux/fs/ext4/inode.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) - * - * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "xattr.h" -#include "acl.h" - -/* - * Test whether an inode is a fast symlink. - */ -static int ext4_inode_is_fast_symlink(struct inode *inode) -{ - int ea_blocks = EXT4_I(inode)->i_file_acl ? - (inode->i_sb->s_blocksize >> 9) : 0; - - return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); -} - -/* - * The ext4 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. - * - * "bh" may be NULL: a metadata block may have been freed from memory - * but there may still be a record of it in the journal, and that record - * still needs to be revoked. - */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr) -{ - int err; - - might_sleep(); - - BUFFER_TRACE(bh, "enter"); - - jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " - "data mode %lx\n", - bh, is_metadata, inode->i_mode, - test_opt(inode->i_sb, DATA_FLAGS)); - - /* Never use the revoke function if we are doing full data - * journaling: there is no need to, and a V1 superblock won't - * support it. Otherwise, only skip the revoke on un-journaled - * data blocks. */ - - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || - (!is_metadata && !ext4_should_journal_data(inode))) { - if (bh) { - BUFFER_TRACE(bh, "call jbd2_journal_forget"); - return ext4_journal_forget(handle, bh); - } - return 0; - } - - /* - * data!=journal && (is_metadata || should_journal_data(inode)) - */ - BUFFER_TRACE(bh, "call ext4_journal_revoke"); - err = ext4_journal_revoke(handle, blocknr, bh); - if (err) - ext4_abort(inode->i_sb, __FUNCTION__, - "error %d when attempting revoke", err); - BUFFER_TRACE(bh, "exit"); - return err; -} - -/* - * Work out how many blocks we need to proceed with the next chunk of a - * truncate transaction. - */ -static unsigned long blocks_for_truncate(struct inode *inode) -{ - unsigned long needed; - - needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); - - /* Give ourselves just enough room to cope with inodes in which - * i_blocks is corrupt: we've seen disk corruptions in the past - * which resulted in random data in an inode which looked enough - * like a regular file for ext4 to try to delete it. Things - * will go a bit crazy if that happens, but at least we should - * try not to panic the whole kernel. */ - if (needed < 2) - needed = 2; - - /* But we need to bound the transaction so we don't overflow the - * journal. */ - if (needed > EXT4_MAX_TRANS_DATA) - needed = EXT4_MAX_TRANS_DATA; - - return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -} - -/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. - * - * start_transaction gets us a new handle for a truncate transaction, - * and extend_transaction tries to extend the existing one a bit. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - */ -static handle_t *start_transaction(struct inode *inode) -{ - handle_t *result; - - result = ext4_journal_start(inode, blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; - - ext4_std_error(inode->i_sb, PTR_ERR(result)); - return result; -} - -/* - * Try to extend this transaction for the purposes of truncation. - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. - */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -{ - if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) - return 0; - if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) - return 0; - return 1; -} - -/* - * Restart the transaction associated with *handle. This does a commit, - * so before we call here everything must be consistently dirtied against - * this transaction. - */ -static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) -{ - jbd_debug(2, "restarting handle %p\n", handle); - return ext4_journal_restart(handle, blocks_for_truncate(inode)); -} - -/* - * Called at the last iput() if i_nlink is zero. - */ -void ext4_delete_inode (struct inode * inode) -{ - handle_t *handle; - - truncate_inode_pages(&inode->i_data, 0); - - if (is_bad_inode(inode)) - goto no_delete; - - handle = start_transaction(inode); - if (IS_ERR(handle)) { - /* - * If we're going to skip the normal cleanup, we still need to - * make sure that the in-core orphan linked list is properly - * cleaned up. - */ - ext4_orphan_del(NULL, inode); - goto no_delete; - } - - if (IS_SYNC(inode)) - handle->h_sync = 1; - inode->i_size = 0; - if (inode->i_blocks) - ext4_truncate(inode); - /* - * Kill off the orphan record which ext4_truncate created. - * AKPM: I think this can be inside the above `if'. - * Note that ext4_orphan_del() has to be able to cope with the - * deletion of a non-existent orphan - this is because we don't - * know if ext4_truncate() actually created an orphan record. - * (Well, we could do this if we need to, but heck - it works) - */ - ext4_orphan_del(handle, inode); - EXT4_I(inode)->i_dtime = get_seconds(); - - /* - * One subtle ordering requirement: if anything has gone wrong - * (transaction abort, IO errors, whatever), then we can still - * do these next steps (the fs will already have been marked as - * having errors), but we can't free the inode if the mark_dirty - * fails. - */ - if (ext4_mark_inode_dirty(handle, inode)) - /* If that failed, just do the required in-core inode clear. */ - clear_inode(inode); - else - ext4_free_inode(handle, inode); - ext4_journal_stop(handle); - return; -no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ -} - -typedef struct { - __le32 *p; - __le32 key; - struct buffer_head *bh; -} Indirect; - -static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -{ - p->key = *(p->p = v); - p->bh = bh; -} - -static int verify_chain(Indirect *from, Indirect *to) -{ - while (from <= to && from->key == *from->p) - from++; - return (from > to); -} - -/** - * ext4_block_to_path - parse the block number into array of offsets - * @inode: inode in question (we are only interested in its superblock) - * @i_block: block number to be parsed - * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. - * - * To store the locations of file's data ext4 uses a data structure common - * for UNIX filesystems - tree of pointers anchored in the inode, with - * data blocks at leaves and indirect blocks in intermediate nodes. - * This function translates the block number into path in that tree - - * return value is the path length and @offsets[n] is the offset of - * pointer to (n+1)th node in the nth one. If @block is out of range - * (negative or too large) warning is printed and zero returned. - * - * Note: function doesn't find node addresses, so no IO is needed. All - * we need to know is the capacity of indirect blocks (taken from the - * inode->i_sb). - */ - -/* - * Portability note: the last comparison (check that we fit into triple - * indirect block) is spelled differently, because otherwise on an - * architecture with 32-bit longs and 8Kb pages we might get into trouble - * if our filesystem had 8Kb blocks. We might use long long, but that would - * kill us on x86. Oh, well, at least the sign propagation does not matter - - * i_block would have to be negative in the very beginning, so we would not - * get there at all. - */ - -static int ext4_block_to_path(struct inode *inode, - long i_block, int offsets[4], int *boundary) -{ - int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); - const long direct_blocks = EXT4_NDIR_BLOCKS, - indirect_blocks = ptrs, - double_blocks = (1 << (ptrs_bits * 2)); - int n = 0; - int final = 0; - - if (i_block < 0) { - ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); - } else if (i_block < direct_blocks) { - offsets[n++] = i_block; - final = direct_blocks; - } else if ( (i_block -= direct_blocks) < indirect_blocks) { - offsets[n++] = EXT4_IND_BLOCK; - offsets[n++] = i_block; - final = ptrs; - } else if ((i_block -= indirect_blocks) < double_blocks) { - offsets[n++] = EXT4_DIND_BLOCK; - offsets[n++] = i_block >> ptrs_bits; - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { - offsets[n++] = EXT4_TIND_BLOCK; - offsets[n++] = i_block >> (ptrs_bits * 2); - offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else { - ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big"); - } - if (boundary) - *boundary = final - 1 - (i_block & (ptrs - 1)); - return n; -} - -/** - * ext4_get_branch - read the chain of indirect blocks leading to data - * @inode: inode in question - * @depth: depth of the chain (1 - direct pointer, etc.) - * @offsets: offsets of pointers in inode/indirect blocks - * @chain: place to store the result - * @err: here we store the error value - * - * Function fills the array of triples and returns %NULL - * if everything went OK or the pointer to the last filled triple - * (incomplete one) otherwise. Upon the return chain[i].key contains - * the number of (i+1)-th block in the chain (as it is stored in memory, - * i.e. little-endian 32-bit), chain[i].p contains the address of that - * number (it points into struct inode for i==0 and into the bh->b_data - * for i>0) and chain[i].bh points to the buffer_head of i-th indirect - * block for i>0 and NULL for i==0. In other words, it holds the block - * numbers of the chain, addresses they were taken from (and where we can - * verify that chain did not change) and buffer_heads hosting these - * numbers. - * - * Function stops when it stumbles upon zero pointer (absent block) - * (pointer to last triple returned, *@err == 0) - * or when it gets an IO error reading an indirect block - * (ditto, *@err == -EIO) - * or when it notices that chain had been changed while it was reading - * (ditto, *@err == -EAGAIN) - * or when it reads all @depth-1 indirect blocks successfully and finds - * the whole chain, all way to the data (returns %NULL, *err == 0). - */ -static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, - Indirect chain[4], int *err) -{ - struct super_block *sb = inode->i_sb; - Indirect *p = chain; - struct buffer_head *bh; - - *err = 0; - /* i_data is not going away, no lock needed */ - add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { - bh = sb_bread(sb, le32_to_cpu(p->key)); - if (!bh) - goto failure; - /* Reader: pointers */ - if (!verify_chain(chain, p)) - goto changed; - add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); - /* Reader: end */ - if (!p->key) - goto no_block; - } - return NULL; - -changed: - brelse(bh); - *err = -EAGAIN; - goto no_block; -failure: - *err = -EIO; -no_block: - return p; -} - -/** - * ext4_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the prefered place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same - * cylinder group. - * - * In the latter case we colour the starting block by the callers PID to - * prevent it from clashing with concurrent allocations for a different inode - * in the same block group. The PID is used here so that functionally related - * files will be close-by on-disk. - * - * Caller must make sure that @ind is valid and will stay that way. - */ -static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; - __le32 *p; - ext4_fsblk_t bg_start; - ext4_grpblk_t colour; - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) { - if (*p) - return le32_to_cpu(*p); - } - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; - - /* - * It is going to be referred to from the inode itself? OK, just put it - * into the same cylinder group then. - */ - bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - return bg_start + colour; -} - -/** - * ext4_find_goal - find a prefered place for allocation. - * @inode: owner - * @block: block we want - * @chain: chain of indirect blocks - * @partial: pointer to the last triple within a chain - * @goal: place to store the result. - * - * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. - */ - -static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block, - Indirect chain[4], Indirect *partial) -{ - struct ext4_block_alloc_info *block_i; - - block_i = EXT4_I(inode)->i_block_alloc_info; - - /* - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ - if (block_i && (block == block_i->last_alloc_logical_block + 1) - && (block_i->last_alloc_physical_block != 0)) { - return block_i->last_alloc_physical_block + 1; - } - - return ext4_find_near(inode, partial); -} - -/** - * ext4_blks_to_allocate: Look up the block map and count the number - * of direct blocks need to be allocated for the given branch. - * - * @branch: chain of indirect blocks - * @k: number of blocks need for indirect blocks - * @blks: number of data blocks to be mapped. - * @blocks_to_boundary: the offset in the indirect block - * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. - */ -static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, - int blocks_to_boundary) -{ - unsigned long count = 0; - - /* - * Simple case, [t,d]Indirect block(s) has not allocated yet - * then it's clear blocks on that path have not allocated - */ - if (k > 0) { - /* right now we don't handle cross boundary allocation */ - if (blks < blocks_to_boundary + 1) - count += blks; - else - count += blocks_to_boundary + 1; - return count; - } - - count++; - while (count < blks && count <= blocks_to_boundary && - le32_to_cpu(*(branch[0].p + count)) == 0) { - count++; - } - return count; -} - -/** - * ext4_alloc_blocks: multiple allocate blocks needed for a branch - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks - * - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @blks: on return it will store the total number of allocated - * direct blocks - */ -static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int indirect_blks, int blks, - ext4_fsblk_t new_blocks[4], int *err) -{ - int target, i; - unsigned long count = 0; - int index = 0; - ext4_fsblk_t current_block = 0; - int ret = 0; - - /* - * Here we try to allocate the requested multiple blocks at once, - * on a best-effort basis. - * To build a branch, we should allocate blocks for - * the indirect blocks(if not allocated yet), and at least - * the first direct block of this branch. That's the - * minimum number of blocks need to allocate(required) - */ - target = blks + indirect_blks; - - while (1) { - count = target; - /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext4_new_blocks(handle,inode,goal,&count,err); - if (*err) - goto failed_out; - - target -= count; - /* allocate blocks for indirect blocks */ - while (index < indirect_blks && count) { - new_blocks[index++] = current_block++; - count--; - } - - if (count > 0) - break; - } - - /* save the new block number for the first direct block */ - new_blocks[index] = current_block; - - /* total number of blocks allocated for direct blocks */ - ret = count; - *err = 0; - return ret; -failed_out: - for (i = 0; i key). Upon the exit we have the same - * picture as after the successful ext4_get_block(), except that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * their buffer_heads) and return the error value the from failed - * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ -static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - int indirect_blks, int *blks, ext4_fsblk_t goal, - int *offsets, Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int i, n = 0; - int err = 0; - struct buffer_head *bh; - int num; - ext4_fsblk_t new_blocks[4]; - ext4_fsblk_t current_block; - - num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, - *blks, new_blocks, &err); - if (err) - return err; - - branch[0].key = cpu_to_le32(new_blocks[0]); - /* - * metadata blocks and data blocks are allocated. - */ - for (n = 1; n <= indirect_blks; n++) { - /* - * Get buffer_head for parent block, zero it out - * and set the pointer to new one, then send - * parent to disk. - */ - bh = sb_getblk(inode->i_sb, new_blocks[n-1]); - branch[n].bh = bh; - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - err = ext4_journal_get_create_access(handle, bh); - if (err) { - unlock_buffer(bh); - brelse(bh); - goto failed; - } - - memset(bh->b_data, 0, blocksize); - branch[n].p = (__le32 *) bh->b_data + offsets[n]; - branch[n].key = cpu_to_le32(new_blocks[n]); - *branch[n].p = branch[n].key; - if ( n == indirect_blks) { - current_block = new_blocks[n]; - /* - * End of chain, update the last new metablock of - * the chain to point to the new allocated - * data blocks numbers - */ - for (i=1; i < num; i++) - *(branch[n].p + i) = cpu_to_le32(++current_block); - } - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); - if (err) - goto failed; - } - *blks = num; - return err; -failed: - /* Allocation failed, free what we already allocated */ - for (i = 1; i <= n ; i++) { - BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); - ext4_journal_forget(handle, branch[i].bh); - } - for (i = 0; i i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. - */ -static int ext4_splice_branch(handle_t *handle, struct inode *inode, - long block, Indirect *where, int num, int blks) -{ - int i; - int err = 0; - struct ext4_block_alloc_info *block_i; - ext4_fsblk_t current_block; - - block_i = EXT4_I(inode)->i_block_alloc_info; - /* - * If we're splicing into a [td]indirect block (as opposed to the - * inode) then we need to get write access to the [td]indirect block - * before the splice. - */ - if (where->bh) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, where->bh); - if (err) - goto err_out; - } - /* That's it */ - - *where->p = where->key; - - /* - * Update the host buffer_head or inode to point to more just allocated - * direct blocks blocks - */ - if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) - *(where->p + i ) = cpu_to_le32(current_block++); - } - - /* - * update the most recently allocated logical & physical block - * in i_block_alloc_info, to assist find the proper goal block for next - * allocation - */ - if (block_i) { - block_i->last_alloc_logical_block = block + blks - 1; - block_i->last_alloc_physical_block = - le32_to_cpu(where[num].key) + blks - 1; - } - - /* We are done with atomic stuff, now do the rest of housekeeping */ - - inode->i_ctime = CURRENT_TIME_SEC; - ext4_mark_inode_dirty(handle, inode); - - /* had we spliced it onto indirect block? */ - if (where->bh) { - /* - * If we spliced it onto an indirect block, we haven't - * altered the inode. Note however that if it is being spliced - * onto an indirect block at the very end of the file (the - * file is growing) then we *will* alter the inode to reflect - * the new i_size. But that is not done here - it is done in - * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. - */ - jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, where->bh); - if (err) - goto err_out; - } else { - /* - * OK, we spliced it into the inode itself on a direct block. - * Inode was dirtied above. - */ - jbd_debug(5, "splicing direct\n"); - } - return err; - -err_out: - for (i = 1; i <= num; i++) { - BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); - ext4_journal_forget(handle, where[i].bh); - ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); - } - ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); - - return err; -} - -/* - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - * - * `handle' can be NULL if create == 0. - * - * The BKL may not be held on entry here. Be sure to take it early. - * return > 0, # of blocks mapped or allocated. - * return = 0, if plain lookup failed. - * return < 0, error case. - */ -int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, - sector_t iblock, unsigned long maxblocks, - struct buffer_head *bh_result, - int create, int extend_disksize) -{ - int err = -EIO; - int offsets[4]; - Indirect chain[4]; - Indirect *partial; - ext4_fsblk_t goal; - int indirect_blks; - int blocks_to_boundary = 0; - int depth; - struct ext4_inode_info *ei = EXT4_I(inode); - int count = 0; - ext4_fsblk_t first_block = 0; - - - J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); - J_ASSERT(handle != NULL || create == 0); - depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary); - - if (depth == 0) - goto out; - - partial = ext4_get_branch(inode, depth, offsets, chain, &err); - - /* Simplest case - block found, no allocation needed */ - if (!partial) { - first_block = le32_to_cpu(chain[depth - 1].key); - clear_buffer_new(bh_result); - count++; - /*map more blocks*/ - while (count < maxblocks && count <= blocks_to_boundary) { - ext4_fsblk_t blk; - - if (!verify_chain(chain, partial)) { - /* - * Indirect block might be removed by - * truncate while we were reading it. - * Handling of that case: forget what we've - * got now. Flag the err as EAGAIN, so it - * will reread. - */ - err = -EAGAIN; - count = 0; - break; - } - blk = le32_to_cpu(*(chain[depth-1].p + count)); - - if (blk == first_block + count) - count++; - else - break; - } - if (err != -EAGAIN) - goto got_it; - } - - /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) - goto cleanup; - - mutex_lock(&ei->truncate_mutex); - - /* - * If the indirect block is missing while we are reading - * the chain(ext4_get_branch() returns -EAGAIN err), or - * if the chain has been changed after we grab the semaphore, - * (either because another process truncated this branch, or - * another get_block allocated this branch) re-grab the chain to see if - * the request block has been allocated or not. - * - * Since we already block the truncate/other get_block - * at this point, we will have the current copy of the chain when we - * splice the branch into the tree. - */ - if (err == -EAGAIN || !verify_chain(chain, partial)) { - while (partial > chain) { - brelse(partial->bh); - partial--; - } - partial = ext4_get_branch(inode, depth, offsets, chain, &err); - if (!partial) { - count++; - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - clear_buffer_new(bh_result); - goto got_it; - } - } - - /* - * Okay, we need to do block allocation. Lazily initialize the block - * allocation info here if necessary - */ - if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) - ext4_init_block_alloc_info(inode); - - goal = ext4_find_goal(inode, iblock, chain, partial); - - /* the number of blocks need to allocate for [d,t]indirect blocks */ - indirect_blks = (chain + depth) - partial - 1; - - /* - * Next look up the indirect map to count the totoal number of - * direct blocks to allocate for this branch. - */ - count = ext4_blks_to_allocate(partial, indirect_blks, - maxblocks, blocks_to_boundary); - /* - * Block out ext4_truncate while we alter the tree - */ - err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, - offsets + (partial - chain), partial); - - /* - * The ext4_splice_branch call will free and forget any buffers - * on the new chain if there is a failure, but that risks using - * up transaction credits, especially for bitmaps where the - * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct - */ - if (!err) - err = ext4_splice_branch(handle, inode, iblock, - partial, indirect_blks, count); - /* - * i_disksize growing is protected by truncate_mutex. Don't forget to - * protect it if you're about to implement concurrent - * ext4_get_block() -bzzz - */ - if (!err && extend_disksize && inode->i_size > ei->i_disksize) - ei->i_disksize = inode->i_size; - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - - set_buffer_new(bh_result); -got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); - if (count > blocks_to_boundary) - set_buffer_boundary(bh_result); - err = count; - /* Clean up and exit */ - partial = chain + depth - 1; /* the whole chain */ -cleanup: - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } - BUFFER_TRACE(bh_result, "returned"); -out: - return err; -} - -#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) - -static int ext4_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - handle_t *handle = journal_current_handle(); - int ret = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - - if (!create) - goto get_block; /* A read */ - - if (max_blocks == 1) - goto get_block; /* A single block get */ - - if (handle->h_transaction->t_state == T_LOCKED) { - /* - * Huge direct-io writes can hold off commits for long - * periods of time. Let this commit run. - */ - ext4_journal_stop(handle); - handle = ext4_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) - ret = PTR_ERR(handle); - goto get_block; - } - - if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) { - /* - * Getting low on buffer credits... - */ - ret = ext4_journal_extend(handle, DIO_CREDITS); - if (ret > 0) { - /* - * Couldn't extend the transaction. Start a new one. - */ - ret = ext4_journal_restart(handle, DIO_CREDITS); - } - } - -get_block: - if (ret == 0) { - ret = ext4_get_blocks_wrap(handle, inode, iblock, - max_blocks, bh_result, create, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - } - return ret; -} - -/* - * `handle' can be NULL if create is zero - */ -struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, - long block, int create, int *errp) -{ - struct buffer_head dummy; - int fatal = 0, err; - - J_ASSERT(handle != NULL || create == 0); - - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); - err = ext4_get_blocks_wrap(handle, inode, block, 1, - &dummy, create, 1); - /* - * ext4_get_blocks_handle() returns number of blocks - * mapped. 0 in case of a HOLE. - */ - if (err > 0) { - if (err > 1) - WARN_ON(1); - err = 0; - } - *errp = err; - if (!err && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (!bh) { - *errp = -EIO; - goto err; - } - if (buffer_new(&dummy)) { - J_ASSERT(create != 0); - J_ASSERT(handle != 0); - - /* - * Now that we do not always journal data, we should - * keep in mind whether this should always journal the - * new buffer as metadata. For now, regular file - * writes use ext4_get_block instead, so it's not a - * problem. - */ - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - fatal = ext4_journal_get_create_access(handle, bh); - if (!fatal && !buffer_uptodate(bh)) { - memset(bh->b_data,0,inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - } - unlock_buffer(bh); - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); - if (!fatal) - fatal = err; - } else { - BUFFER_TRACE(bh, "not a new buffer"); - } - if (fatal) { - *errp = fatal; - brelse(bh); - bh = NULL; - } - return bh; - } -err: - return NULL; -} - -struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, - int block, int create, int *err) -{ - struct buffer_head * bh; - - bh = ext4_getblk(handle, inode, block, create, err); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - return bh; - ll_rw_block(READ_META, 1, &bh); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return bh; - put_bh(bh); - *err = -EIO; - return NULL; -} - -static int walk_page_buffers( handle_t *handle, - struct buffer_head *head, - unsigned from, - unsigned to, - int *partial, - int (*fn)( handle_t *handle, - struct buffer_head *bh)) -{ - struct buffer_head *bh; - unsigned block_start, block_end; - unsigned blocksize = head->b_size; - int err, ret = 0; - struct buffer_head *next; - - for ( bh = head, block_start = 0; - ret == 0 && (bh != head || !block_start); - block_start = block_end, bh = next) - { - next = bh->b_this_page; - block_end = block_start + blocksize; - if (block_end <= from || block_start >= to) { - if (partial && !buffer_uptodate(bh)) - *partial = 1; - continue; - } - err = (*fn)(handle, bh); - if (!ret) - ret = err; - } - return ret; -} - -/* - * To preserve ordering, it is essential that the hole instantiation and - * the data write be encapsulated in a single transaction. We cannot - * close off a transaction and start a new one between the ext4_get_block() - * and the commit_write(). So doing the jbd2_journal_start at the start of - * prepare_write() is the right place. - * - * Also, this function can nest inside ext4_writepage() -> - * block_write_full_page(). In that case, we *know* that ext4_writepage() - * has generated enough buffer credits to do the whole page. So we won't - * block on the journal in that case, which is good, because the caller may - * be PF_MEMALLOC. - * - * By accident, ext4 can be reentered when a transaction is open via - * quota file writes. If we were to commit the transaction while thus - * reentered, there can be a deadlock - we would be holding a quota - * lock, and the commit would never complete if another thread had a - * transaction open and was blocking on the quota lock - a ranking - * violation. - * - * So what we do is to rely on the fact that jbd2_journal_stop/journal_start - * will _not_ run commit under these circumstances because handle->h_ref - * is elevated. We'll still have enough credits for the tiny quotafile - * write. - */ -static int do_journal_get_write_access(handle_t *handle, - struct buffer_head *bh) -{ - if (!buffer_mapped(bh) || buffer_freed(bh)) - return 0; - return ext4_journal_get_write_access(handle, bh); -} - -static int ext4_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - int ret, needed_blocks = ext4_writepage_trans_blocks(inode); - handle_t *handle; - int retries = 0; - -retry: - handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_prepare_write(page, from, to, ext4_get_block); - else - ret = block_prepare_write(page, from, to, ext4_get_block); - if (ret) - goto prepare_write_failed; - - if (ext4_should_journal_data(inode)) { - ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, do_journal_get_write_access); - } -prepare_write_failed: - if (ret) - ext4_journal_stop(handle); - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; -out: - return ret; -} - -int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_dirty_data(handle, bh); - if (err) - ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, - bh, handle,err); - return err; -} - -/* For commit_write() in data=journal mode */ -static int commit_write_fn(handle_t *handle, struct buffer_head *bh) -{ - if (!buffer_mapped(bh) || buffer_freed(bh)) - return 0; - set_buffer_uptodate(bh); - return ext4_journal_dirty_metadata(handle, bh); -} - -/* - * We need to pick up the new inode size which generic_commit_write gave us - * `file' can be NULL - eg, when called from page_symlink(). - * - * ext4 never places buffers on inode->i_mapping->private_list. metadata - * buffers are managed internally. - */ -static int ext4_ordered_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; - int ret = 0, ret2; - - ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, ext4_journal_dirty_data); - - if (ret == 0) { - /* - * generic_commit_write() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. - */ - loff_t new_i_size; - - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (new_i_size > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = new_i_size; - ret = generic_commit_write(file, page, from, to); - } - ret2 = ext4_journal_stop(handle); - if (!ret) - ret = ret2; - return ret; -} - -static int ext4_writeback_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; - int ret = 0, ret2; - loff_t new_i_size; - - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (new_i_size > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = new_i_size; - - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_commit_write(file, page, from, to); - else - ret = generic_commit_write(file, page, from, to); - - ret2 = ext4_journal_stop(handle); - if (!ret) - ret = ret2; - return ret; -} - -static int ext4_journalled_commit_write(struct file *file, - struct page *page, unsigned from, unsigned to) -{ - handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; - int ret = 0, ret2; - int partial = 0; - loff_t pos; - - /* - * Here we duplicate the generic_commit_write() functionality - */ - pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - ret = walk_page_buffers(handle, page_buffers(page), from, - to, &partial, commit_write_fn); - if (!partial) - SetPageUptodate(page); - if (pos > inode->i_size) - i_size_write(inode, pos); - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; - if (inode->i_size > EXT4_I(inode)->i_disksize) { - EXT4_I(inode)->i_disksize = inode->i_size; - ret2 = ext4_mark_inode_dirty(handle, inode); - if (!ret) - ret = ret2; - } - ret2 = ext4_journal_stop(handle); - if (!ret) - ret = ret2; - return ret; -} - -/* - * bmap() is special. It gets used by applications such as lilo and by - * the swapper to find the on-disk block of a specific piece of data. - * - * Naturally, this is dangerous if the block concerned is still in the - * journal. If somebody makes a swapfile on an ext4 data-journaling - * filesystem and enables swap, then they may get a nasty shock when the - * data getting swapped to that swapfile suddenly gets overwritten by - * the original zero's written out previously to the journal and - * awaiting writeback in the kernel's buffer cache. - * - * So, if we see any bmap calls here on a modified, data-journaled file, - * take extra steps to flush any blocks which might be in the cache. - */ -static sector_t ext4_bmap(struct address_space *mapping, sector_t block) -{ - struct inode *inode = mapping->host; - journal_t *journal; - int err; - - if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { - /* - * This is a REALLY heavyweight approach, but the use of - * bmap on dirty files is expected to be extremely rare: - * only if we run lilo or swapon on a freshly made file - * do we expect this to happen. - * - * (bmap requires CAP_SYS_RAWIO so this does not - * represent an unprivileged user DOS attack --- we'd be - * in trouble if mortal users could trigger this path at - * will.) - * - * NB. EXT4_STATE_JDATA is not set on files other than - * regular files. If somebody wants to bmap a directory - * or symlink and gets confused because the buffer - * hasn't yet been flushed to disk, they deserve - * everything they get. - */ - - EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; - journal = EXT4_JOURNAL(inode); - jbd2_journal_lock_updates(journal); - err = jbd2_journal_flush(journal); - jbd2_journal_unlock_updates(journal); - - if (err) - return 0; - } - - return generic_block_bmap(mapping,block,ext4_get_block); -} - -static int bget_one(handle_t *handle, struct buffer_head *bh) -{ - get_bh(bh); - return 0; -} - -static int bput_one(handle_t *handle, struct buffer_head *bh) -{ - put_bh(bh); - return 0; -} - -static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) -{ - if (buffer_mapped(bh)) - return ext4_journal_dirty_data(handle, bh); - return 0; -} - -/* - * Note that we always start a transaction even if we're not journalling - * data. This is to preserve ordering: any hole instantiation within - * __block_write_full_page -> ext4_get_block() should be journalled - * along with the data so we don't crash and then get metadata which - * refers to old data. - * - * In all journalling modes block_write_full_page() will start the I/O. - * - * Problem: - * - * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> - * ext4_writepage() - * - * Similar for: - * - * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... - * - * Same applies to ext4_get_block(). We will deadlock on various things like - * lock_journal and i_truncate_mutex. - * - * Setting PF_MEMALLOC here doesn't work - too many internal memory - * allocations fail. - * - * 16May01: If we're reentered then journal_current_handle() will be - * non-zero. We simply *return*. - * - * 1 July 2001: @@@ FIXME: - * In journalled data mode, a data buffer may be metadata against the - * current transaction. But the same file is part of a shared mapping - * and someone does a writepage() on it. - * - * We will move the buffer onto the async_data list, but *after* it has - * been dirtied. So there's a small window where we have dirty data on - * BJ_Metadata. - * - * Note that this only applies to the last partial page in the file. The - * bit which block_write_full_page() uses prepare/commit for. (That's - * broken code anyway: it's wrong for msync()). - * - * It's a rare case: affects the final partial page, for journalled data - * where the file is subject to bith write() and writepage() in the same - * transction. To fix it we'll need a custom block_write_full_page(). - * We'll probably need that anyway for journalling writepage() output. - * - * We don't honour synchronous mounts for writepage(). That would be - * disastrous. Any write() or metadata operation will sync the fs for - * us. - * - * AKPM2: if all the page's buffers are mapped to disk and !data=journal, - * we don't need to open a transaction here. - */ -static int ext4_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *page_bufs; - handle_t *handle = NULL; - int ret = 0; - int err; - - J_ASSERT(PageLocked(page)); - - /* - * We give up here if we're reentered, because it might be for a - * different filesystem. - */ - if (ext4_journal_current_handle()) - goto out_fail; - - handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); - - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_fail; - } - - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - page_bufs = page_buffers(page); - walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, bget_one); - - ret = block_write_full_page(page, ext4_get_block, wbc); - - /* - * The page can become unlocked at any point now, and - * truncate can then come in and change things. So we - * can't touch *page from now on. But *page_bufs is - * safe due to elevated refcount. - */ - - /* - * And attach them to the current transaction. But only if - * block_write_full_page() succeeded. Otherwise they are unmapped, - * and generally junk. - */ - if (ret == 0) { - err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, - NULL, jbd2_journal_dirty_data_fn); - if (!ret) - ret = err; - } - walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, bput_one); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; - -out_fail: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return ret; -} - -static int ext4_writeback_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - handle_t *handle = NULL; - int ret = 0; - int err; - - if (ext4_journal_current_handle()) - goto out_fail; - - handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_fail; - } - - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_writepage(page, ext4_get_block, wbc); - else - ret = block_write_full_page(page, ext4_get_block, wbc); - - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; - -out_fail: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return ret; -} - -static int ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - handle_t *handle = NULL; - int ret = 0; - int err; - - if (ext4_journal_current_handle()) - goto no_write; - - handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto no_write; - } - - if (!page_has_buffers(page) || PageChecked(page)) { - /* - * It's mmapped pagecache. Add buffers and journal it. There - * doesn't seem much point in redirtying the page here. - */ - ClearPageChecked(page); - ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - ext4_get_block); - if (ret != 0) { - ext4_journal_stop(handle); - goto out_unlock; - } - ret = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); - - err = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, commit_write_fn); - if (ret == 0) - ret = err; - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; - unlock_page(page); - } else { - /* - * It may be a page full of checkpoint-mode buffers. We don't - * really know unless we go poke around in the buffer_heads. - * But block_write_full_page will do the right thing. - */ - ret = block_write_full_page(page, ext4_get_block, wbc); - } - err = ext4_journal_stop(handle); - if (!ret) - ret = err; -out: - return ret; - -no_write: - redirty_page_for_writepage(wbc, page); -out_unlock: - unlock_page(page); - goto out; -} - -static int ext4_readpage(struct file *file, struct page *page) -{ - return mpage_readpage(page, ext4_get_block); -} - -static int -ext4_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); -} - -static void ext4_invalidatepage(struct page *page, unsigned long offset) -{ - journal_t *journal = EXT4_JOURNAL(page->mapping->host); - - /* - * If it's a full truncate we just forget about the pending dirtying - */ - if (offset == 0) - ClearPageChecked(page); - - jbd2_journal_invalidatepage(journal, page, offset); -} - -static int ext4_releasepage(struct page *page, gfp_t wait) -{ - journal_t *journal = EXT4_JOURNAL(page->mapping->host); - - WARN_ON(PageChecked(page)); - if (!page_has_buffers(page)) - return 0; - return jbd2_journal_try_to_free_buffers(journal, page, wait); -} - -/* - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. - */ -static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle = NULL; - ssize_t ret; - int orphan = 0; - size_t count = iov_length(iov, nr_segs); - - if (rw == WRITE) { - loff_t final_size = offset + count; - - handle = ext4_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - if (final_size > inode->i_size) { - ret = ext4_orphan_add(handle, inode); - if (ret) - goto out_stop; - orphan = 1; - ei->i_disksize = inode->i_size; - } - } - - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL); - - /* - * Reacquire the handle: ext4_get_block() can restart the transaction - */ - handle = journal_current_handle(); - -out_stop: - if (handle) { - int err; - - if (orphan && inode->i_nlink) - ext4_orphan_del(handle, inode); - if (orphan && ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - -/* - * Pages can be marked dirty completely asynchronously from ext4's journalling - * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do - * much here because ->set_page_dirty is called under VFS locks. The page is - * not necessarily locked. - * - * We cannot just dirty the page and leave attached buffers clean, because the - * buffers' dirty state is "definitive". We cannot just set the buffers dirty - * or jbddirty because all the journalling code will explode. - * - * So what we do is to mark the page "pending dirty" and next time writepage - * is called, propagate that into the buffers appropriately. - */ -static int ext4_journalled_set_page_dirty(struct page *page) -{ - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); -} - -static const struct address_space_operations ext4_ordered_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_ordered_writepage, - .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_ordered_commit_write, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, -}; - -static const struct address_space_operations ext4_writeback_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_writeback_writepage, - .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_writeback_commit_write, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, -}; - -static const struct address_space_operations ext4_journalled_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_journalled_writepage, - .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_journalled_commit_write, - .set_page_dirty = ext4_journalled_set_page_dirty, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, -}; - -void ext4_set_aops(struct inode *inode) -{ - if (ext4_should_order_data(inode)) - inode->i_mapping->a_ops = &ext4_ordered_aops; - else if (ext4_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext4_writeback_aops; - else - inode->i_mapping->a_ops = &ext4_journalled_aops; -} - -/* - * ext4_block_truncate_page() zeroes out a mapping from file offset `from' - * up to the end of the block which corresponds to `from'. - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ -int ext4_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) -{ - ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned blocksize, iblock, length, pos; - struct inode *inode = mapping->host; - struct buffer_head *bh; - int err = 0; - void *kaddr; - - blocksize = inode->i_sb->s_blocksize; - length = blocksize - (offset & (blocksize - 1)); - iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); - - /* - * For "nobh" option, we can only work if we don't need to - * read-in the page - otherwise we create buffers to do the IO. - */ - if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && - ext4_should_writeback_data(inode) && PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - set_page_dirty(page); - goto unlock; - } - - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - - /* Find the buffer that contains "offset" */ - bh = page_buffers(page); - pos = blocksize; - while (offset >= pos) { - bh = bh->b_this_page; - iblock++; - pos += blocksize; - } - - err = 0; - if (buffer_freed(bh)) { - BUFFER_TRACE(bh, "freed: skip"); - goto unlock; - } - - if (!buffer_mapped(bh)) { - BUFFER_TRACE(bh, "unmapped"); - ext4_get_block(inode, iblock, bh, 0); - /* unmapped? It's a hole - nothing to do */ - if (!buffer_mapped(bh)) { - BUFFER_TRACE(bh, "still unmapped"); - goto unlock; - } - } - - /* Ok, it's mapped. Make sure it's up-to-date */ - if (PageUptodate(page)) - set_buffer_uptodate(bh); - - if (!buffer_uptodate(bh)) { - err = -EIO; - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - /* Uhhuh. Read error. Complain and punt. */ - if (!buffer_uptodate(bh)) - goto unlock; - } - - if (ext4_should_journal_data(inode)) { - BUFFER_TRACE(bh, "get write access"); - err = ext4_journal_get_write_access(handle, bh); - if (err) - goto unlock; - } - - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - - BUFFER_TRACE(bh, "zeroed end of block"); - - err = 0; - if (ext4_should_journal_data(inode)) { - err = ext4_journal_dirty_metadata(handle, bh); - } else { - if (ext4_should_order_data(inode)) - err = ext4_journal_dirty_data(handle, bh); - mark_buffer_dirty(bh); - } - -unlock: - unlock_page(page); - page_cache_release(page); - return err; -} - -/* - * Probably it should be a library function... search for first non-zero word - * or memcmp with zero_page, whatever is better for particular architecture. - * Linus? - */ -static inline int all_zeroes(__le32 *p, __le32 *q) -{ - while (p < q) - if (*p++) - return 0; - return 1; -} - -/** - * ext4_find_shared - find the indirect blocks for partial truncation. - * @inode: inode in question - * @depth: depth of the affected branch - * @offsets: offsets of pointers in that branch (see ext4_block_to_path) - * @chain: place to store the pointers to partial indirect blocks - * @top: place to the (detached) top of branch - * - * This is a helper function used by ext4_truncate(). - * - * When we do truncate() we may have to clean the ends of several - * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is refered - * from it (and it is on the path to the first completely truncated - * data block, indeed). We have to free the top of that path along - * with everything to the right of the path. Since no allocation - * past the truncation point is possible until ext4_truncate() - * finishes, we may safely do the latter, but top of branch may - * require special attention - pageout below the truncation point - * might try to populate it. - * - * We atomically detach the top of branch from the tree, store the - * block number of its root in *@top, pointers to buffer_heads of - * partially truncated blocks - in @chain[].bh and pointers to - * their last elements that should not be removed - in - * @chain[].p. Return value is the pointer to last filled element - * of @chain. - * - * The work left to caller to do the actual freeing of subtrees: - * a) free the subtree starting from *@top - * b) free the subtrees whose roots are stored in - * (@chain[i].p+1 .. end of @chain[i].bh->b_data) - * c) free the subtrees growing from the inode past the @chain[0]. - * (no partially truncated stuff there). */ - -static Indirect *ext4_find_shared(struct inode *inode, int depth, - int offsets[4], Indirect chain[4], __le32 *top) -{ - Indirect *partial, *p; - int k, err; - - *top = 0; - /* Make k index the deepest non-null offest + 1 */ - for (k = depth; k > 1 && !offsets[k-1]; k--) - ; - partial = ext4_get_branch(inode, k, offsets, chain, &err); - /* Writer: pointers */ - if (!partial) - partial = chain + k-1; - /* - * If the branch acquired continuation since we've looked at it - - * fine, it should all survive and (new) top doesn't belong to us. - */ - if (!partial->key && *partial->p) - /* Writer: end */ - goto no_top; - for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) - ; - /* - * OK, we've found the last block that must survive. The rest of our - * branch should be detached before unlocking. However, if that rest - * of branch is all ours and does not grow immediately from the inode - * it's easier to cheat and just decrement partial->p. - */ - if (p == chain + k - 1 && p > chain) { - p->p--; - } else { - *top = *p->p; - /* Nope, don't do this in ext4. Must leave the tree intact */ -#if 0 - *p->p = 0; -#endif - } - /* Writer: end */ - - while(partial > p) { - brelse(partial->bh); - partial--; - } -no_top: - return partial; -} - -/* - * Zero a number of block pointers in either an inode or an indirect block. - * If we restart the transaction we must again get write access to the - * indirect block for further modification. - * - * We release `count' blocks on disk, but (last - first) may be greater - * than `count' because there can be holes in there. - */ -static void ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, __le32 *last) -{ - __le32 *p; - if (try_to_extend_transaction(handle, inode)) { - if (bh) { - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, bh); - } - ext4_mark_inode_dirty(handle, inode); - ext4_journal_test_restart(handle, inode); - if (bh) { - BUFFER_TRACE(bh, "retaking write access"); - ext4_journal_get_write_access(handle, bh); - } - } - - /* - * Any buffers which are on the journal will be in memory. We find - * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() - * on them. We've already detached each block from the file, so - * bforget() in jbd2_journal_forget() should be safe. - * - * AKPM: turn on bforget in jbd2_journal_forget()!!! - */ - for (p = first; p < last; p++) { - u32 nr = le32_to_cpu(*p); - if (nr) { - struct buffer_head *bh; - - *p = 0; - bh = sb_find_get_block(inode->i_sb, nr); - ext4_forget(handle, 0, inode, bh, nr); - } - } - - ext4_free_blocks(handle, inode, block_to_free, count); -} - -/** - * ext4_free_data - free a list of data blocks - * @handle: handle for this transaction - * @inode: inode we are dealing with - * @this_bh: indirect buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: points immediately past the end of array - * - * We are freeing all blocks refered from that array (numbers are stored as - * little-endian 32-bit) and updating @inode->i_blocks appropriately. - * - * We accumulate contiguous runs of blocks to free. Conveniently, if these - * blocks are contiguous then releasing them at one time will only affect one - * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't - * actually use a lot of journal space. - * - * @this_bh will be %NULL if @first and @last point into the inode's direct - * block pointers. - */ -static void ext4_free_data(handle_t *handle, struct inode *inode, - struct buffer_head *this_bh, - __le32 *first, __le32 *last) -{ - ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ - unsigned long count = 0; /* Number of blocks in the run */ - __le32 *block_to_free_p = NULL; /* Pointer into inode/ind - corresponding to - block_to_free */ - ext4_fsblk_t nr; /* Current block # */ - __le32 *p; /* Pointer into inode/ind - for current block */ - int err; - - if (this_bh) { /* For indirect block */ - BUFFER_TRACE(this_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, this_bh); - /* Important: if we can't update the indirect pointers - * to the blocks, we can't free them. */ - if (err) - return; - } - - for (p = first; p < last; p++) { - nr = le32_to_cpu(*p); - if (nr) { - /* accumulate blocks to free if they're contiguous */ - if (count == 0) { - block_to_free = nr; - block_to_free_p = p; - count = 1; - } else if (nr == block_to_free + count) { - count++; - } else { - ext4_clear_blocks(handle, inode, this_bh, - block_to_free, - count, block_to_free_p, p); - block_to_free = nr; - block_to_free_p = p; - count = 1; - } - } - } - - if (count > 0) - ext4_clear_blocks(handle, inode, this_bh, block_to_free, - count, block_to_free_p, p); - - if (this_bh) { - BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, this_bh); - } -} - -/** - * ext4_free_branches - free an array of branches - * @handle: JBD handle for this transaction - * @inode: inode we are dealing with - * @parent_bh: the buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: pointer immediately past the end of array - * @depth: depth of the branches to free - * - * We are freeing all blocks refered from these branches (numbers are - * stored as little-endian 32-bit) and updating @inode->i_blocks - * appropriately. - */ -static void ext4_free_branches(handle_t *handle, struct inode *inode, - struct buffer_head *parent_bh, - __le32 *first, __le32 *last, int depth) -{ - ext4_fsblk_t nr; - __le32 *p; - - if (is_handle_aborted(handle)) - return; - - if (depth--) { - struct buffer_head *bh; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - p = last; - while (--p >= first) { - nr = le32_to_cpu(*p); - if (!nr) - continue; /* A hole */ - - /* Go read the buffer for the next level down */ - bh = sb_bread(inode->i_sb, nr); - - /* - * A read failure? Report error and clear slot - * (should be rare). - */ - if (!bh) { - ext4_error(inode->i_sb, "ext4_free_branches", - "Read failure, inode=%lu, block=%llu", - inode->i_ino, nr); - continue; - } - - /* This zaps the entire block. Bottom up. */ - BUFFER_TRACE(bh, "free child branches"); - ext4_free_branches(handle, inode, bh, - (__le32*)bh->b_data, - (__le32*)bh->b_data + addr_per_block, - depth); - - /* - * We've probably journalled the indirect block several - * times during the truncate. But it's no longer - * needed and we now drop it from the transaction via - * jbd2_journal_revoke(). - * - * That's easy if it's exclusively part of this - * transaction. But if it's part of the committing - * transaction then jbd2_journal_forget() will simply - * brelse() it. That means that if the underlying - * block is reallocated in ext4_get_block(), - * unmap_underlying_metadata() will find this block - * and will try to get rid of it. damn, damn. - * - * If this block has already been committed to the - * journal, a revoke record will be written. And - * revoke records must be emitted *before* clearing - * this block's bit in the bitmaps. - */ - ext4_forget(handle, 1, inode, bh, bh->b_blocknr); - - /* - * Everything below this this pointer has been - * released. Now let this top-of-subtree go. - * - * We want the freeing of this indirect block to be - * atomic in the journal with the updating of the - * bitmap block which owns it. So make some room in - * the journal. - * - * We zero the parent pointer *after* freeing its - * pointee in the bitmaps, so if extend_transaction() - * for some reason fails to put the bitmap changes and - * the release into the same transaction, recovery - * will merely complain about releasing a free block, - * rather than leaking blocks. - */ - if (is_handle_aborted(handle)) - return; - if (try_to_extend_transaction(handle, inode)) { - ext4_mark_inode_dirty(handle, inode); - ext4_journal_test_restart(handle, inode); - } - - ext4_free_blocks(handle, inode, nr, 1); - - if (parent_bh) { - /* - * The block which we have just freed is - * pointed to by an indirect block: journal it - */ - BUFFER_TRACE(parent_bh, "get_write_access"); - if (!ext4_journal_get_write_access(handle, - parent_bh)){ - *p = 0; - BUFFER_TRACE(parent_bh, - "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, - parent_bh); - } - } - } - } else { - /* We have reached the bottom of the tree. */ - BUFFER_TRACE(parent_bh, "free data blocks"); - ext4_free_data(handle, inode, parent_bh, first, last); - } -} - -/* - * ext4_truncate() - * - * We block out ext4_get_block() block instantiations across the entire - * transaction, and VFS/VM ensures that ext4_truncate() cannot run - * simultaneously on behalf of the same inode. - * - * As we work through the truncate and commmit bits of it to the journal there - * is one core, guiding principle: the file's tree must always be consistent on - * disk. We must be able to restart the truncate after a crash. - * - * The file's tree may be transiently inconsistent in memory (although it - * probably isn't), but whenever we close off and commit a journal transaction, - * the contents of (the filesystem + the journal) must be consistent and - * restartable. It's pretty simple, really: bottom up, right to left (although - * left-to-right works OK too). - * - * Note that at recovery time, journal replay occurs *before* the restart of - * truncate against the orphan inode list. - * - * The committed inode has the new, desired i_size (which is the same as - * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see - * that this inode's truncate did not complete and it will again call - * ext4_truncate() to have another go. So there will be instantiated blocks - * to the right of the truncation point in a crashed ext4 filesystem. But - * that's fine - as long as they are linked from the inode, the post-crash - * ext4_truncate() run will find them and release them. - */ -void ext4_truncate(struct inode *inode) -{ - handle_t *handle; - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *i_data = ei->i_data; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - int offsets[4]; - Indirect chain[4]; - Indirect *partial; - __le32 nr = 0; - int n; - long last_block; - unsigned blocksize = inode->i_sb->s_blocksize; - struct page *page; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (ext4_inode_is_fast_symlink(inode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - - /* - * We have to lock the EOF page here, because lock_page() nests - * outside jbd2_journal_start(). - */ - if ((inode->i_size & (blocksize - 1)) == 0) { - /* Block boundary? Nothing to do */ - page = NULL; - } else { - page = grab_cache_page(mapping, - inode->i_size >> PAGE_CACHE_SHIFT); - if (!page) - return; - } - - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_truncate(inode, page); - - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { - clear_highpage(page); - flush_dcache_page(page); - unlock_page(page); - page_cache_release(page); - } - return; /* AKPM: return what? */ - } - - last_block = (inode->i_size + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - - if (page) - ext4_block_truncate_page(handle, page, mapping, inode->i_size); - - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ - - /* - * OK. This truncate is going to happen. We add the inode to the - * orphan list, so that if this truncate spans multiple transactions, - * and we crash, we will resume the truncate when the filesystem - * recovers. It also marks the inode dirty, to catch the new size. - * - * Implication: the file must always be in a sane, consistent - * truncatable state while each transaction commits. - */ - if (ext4_orphan_add(handle, inode)) - goto out_stop; - - /* - * The orphan list entry will now protect us from any crash which - * occurs before the truncate completes, so it is now safe to propagate - * the new, shorter inode size (held for now in i_size) into the - * on-disk inode. We do this via i_disksize, which is the value which - * ext4 *really* writes onto the disk inode. - */ - ei->i_disksize = inode->i_size; - - /* - * From here we block out all ext4_get_block() callers who want to - * modify the block allocation tree. - */ - mutex_lock(&ei->truncate_mutex); - - if (n == 1) { /* direct blocks */ - ext4_free_data(handle, inode, NULL, i_data+offsets[0], - i_data + EXT4_NDIR_BLOCKS); - goto do_indirects; - } - - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - /* Kill the top of shared branch (not detached) */ - if (nr) { - if (partial == chain) { - /* Shared branch grows from the inode */ - ext4_free_branches(handle, inode, NULL, - &nr, &nr+1, (chain+n-1) - partial); - *partial->p = 0; - /* - * We mark the inode dirty prior to restart, - * and prior to stop. No need for it here. - */ - } else { - /* Shared branch grows from an indirect block */ - BUFFER_TRACE(partial->bh, "get_write_access"); - ext4_free_branches(handle, inode, partial->bh, - partial->p, - partial->p+1, (chain+n-1) - partial); - } - } - /* Clear the ends of indirect blocks on the shared branch */ - while (partial > chain) { - ext4_free_branches(handle, inode, partial->bh, partial->p + 1, - (__le32*)partial->bh->b_data+addr_per_block, - (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse (partial->bh); - partial--; - } -do_indirects: - /* Kill the remaining (whole) subtrees */ - switch (offsets[0]) { - default: - nr = i_data[EXT4_IND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); - i_data[EXT4_IND_BLOCK] = 0; - } - case EXT4_IND_BLOCK: - nr = i_data[EXT4_DIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); - i_data[EXT4_DIND_BLOCK] = 0; - } - case EXT4_DIND_BLOCK: - nr = i_data[EXT4_TIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); - i_data[EXT4_TIND_BLOCK] = 0; - } - case EXT4_TIND_BLOCK: - ; - } - - ext4_discard_reservation(inode); - - mutex_unlock(&ei->truncate_mutex); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - ext4_mark_inode_dirty(handle, inode); - - /* - * In a multi-transaction truncate, we only make the final transaction - * synchronous - */ - if (IS_SYNC(inode)) - handle->h_sync = 1; -out_stop: - /* - * If this was a simple ftruncate(), and the file will remain alive - * then we need to clear up the orphan record which we created above. - * However, if this was a real unlink then we were called by - * ext4_delete_inode(), and we allow that function to clean up the - * orphan info for us. - */ - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - - ext4_journal_stop(handle); -} - -static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, - unsigned long ino, struct ext4_iloc *iloc) -{ - unsigned long desc, group_desc, block_group; - unsigned long offset; - ext4_fsblk_t block; - struct buffer_head *bh; - struct ext4_group_desc * gdp; - - if (!ext4_valid_inum(sb, ino)) { - /* - * This error is already checked for in namei.c unless we are - * looking at an NFS filehandle, in which case no error - * report is needed - */ - return 0; - } - - block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); - if (block_group >= EXT4_SB(sb)->s_groups_count) { - ext4_error(sb,"ext4_get_inode_block","group >= groups count"); - return 0; - } - smp_rmb(); - group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); - desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - bh = EXT4_SB(sb)->s_group_desc[group_desc]; - if (!bh) { - ext4_error (sb, "ext4_get_inode_block", - "Descriptor not loaded"); - return 0; - } - - gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data + - desc * EXT4_DESC_SIZE(sb)); - /* - * Figure out the offset within the block group inode table - */ - offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * - EXT4_INODE_SIZE(sb); - block = ext4_inode_table(sb, gdp) + - (offset >> EXT4_BLOCK_SIZE_BITS(sb)); - - iloc->block_group = block_group; - iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); - return block; -} - -/* - * ext4_get_inode_loc returns with an extra refcount against the inode's - * underlying buffer_head on success. If 'in_mem' is true, we have all - * data in memory that is needed to recreate the on-disk version of this - * inode. - */ -static int __ext4_get_inode_loc(struct inode *inode, - struct ext4_iloc *iloc, int in_mem) -{ - ext4_fsblk_t block; - struct buffer_head *bh; - - block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); - if (!block) - return -EIO; - - bh = sb_getblk(inode->i_sb, block); - if (!bh) { - ext4_error (inode->i_sb, "ext4_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%llu", - inode->i_ino, block); - return -EIO; - } - if (!buffer_uptodate(bh)) { - lock_buffer(bh); - if (buffer_uptodate(bh)) { - /* someone brought it uptodate while we waited */ - unlock_buffer(bh); - goto has_buffer; - } - - /* - * If we have all information of the inode in memory and this - * is the only valid inode in the block, we need not read the - * block. - */ - if (in_mem) { - struct buffer_head *bitmap_bh; - struct ext4_group_desc *desc; - int inodes_per_buffer; - int inode_offset, i; - int block_group; - int start; - - block_group = (inode->i_ino - 1) / - EXT4_INODES_PER_GROUP(inode->i_sb); - inodes_per_buffer = bh->b_size / - EXT4_INODE_SIZE(inode->i_sb); - inode_offset = ((inode->i_ino - 1) % - EXT4_INODES_PER_GROUP(inode->i_sb)); - start = inode_offset & ~(inodes_per_buffer - 1); - - /* Is the inode bitmap in cache? */ - desc = ext4_get_group_desc(inode->i_sb, - block_group, NULL); - if (!desc) - goto make_io; - - bitmap_bh = sb_getblk(inode->i_sb, - ext4_inode_bitmap(inode->i_sb, desc)); - if (!bitmap_bh) - goto make_io; - - /* - * If the inode bitmap isn't in cache then the - * optimisation may end up performing two reads instead - * of one, so skip it. - */ - if (!buffer_uptodate(bitmap_bh)) { - brelse(bitmap_bh); - goto make_io; - } - for (i = start; i < start + inodes_per_buffer; i++) { - if (i == inode_offset) - continue; - if (ext4_test_bit(i, bitmap_bh->b_data)) - break; - } - brelse(bitmap_bh); - if (i == start + inodes_per_buffer) { - /* all other inodes are free, so skip I/O */ - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - unlock_buffer(bh); - goto has_buffer; - } - } - -make_io: - /* - * There are other valid inodes in the buffer, this inode - * has in-inode xattrs, or we don't have this inode in memory. - * Read the block from disk. - */ - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - ext4_error(inode->i_sb, "ext4_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%llu", - inode->i_ino, block); - brelse(bh); - return -EIO; - } - } -has_buffer: - iloc->bh = bh; - return 0; -} - -int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) -{ - /* We have all inode data except xattrs in memory here. */ - return __ext4_get_inode_loc(inode, iloc, - !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); -} - -void ext4_set_inode_flags(struct inode *inode) -{ - unsigned int flags = EXT4_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - if (flags & EXT4_SYNC_FL) - inode->i_flags |= S_SYNC; - if (flags & EXT4_APPEND_FL) - inode->i_flags |= S_APPEND; - if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - if (flags & EXT4_NOATIME_FL) - inode->i_flags |= S_NOATIME; - if (flags & EXT4_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; -} - -void ext4_read_inode(struct inode * inode) -{ - struct ext4_iloc iloc; - struct ext4_inode *raw_inode; - struct ext4_inode_info *ei = EXT4_I(inode); - struct buffer_head *bh; - int block; - -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif - ei->i_block_alloc_info = NULL; - - if (__ext4_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; - bh = iloc.bh; - raw_inode = ext4_raw_inode(&iloc); - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); - if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; - } - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); - inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - - ei->i_state = 0; - ei->i_dir_start_lookup = 0; - ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); - /* We now have enough fields to check if the inode was active or not. - * This is needed because nfsd might try to access dead inodes - * the test is that same one that e2fsck uses - * NeilBrown 1999oct15 - */ - if (inode->i_nlink == 0) { - if (inode->i_mode == 0 || - !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { - /* this inode is deleted */ - brelse (bh); - goto bad_inode; - } - /* The only unlinked inodes we let through here have - * valid i_mode and are being read by the orphan - * recovery code: that's fine, we're about to complete - * the process of deleting those. */ - } - inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); - ei->i_flags = le32_to_cpu(raw_inode->i_flags); -#ifdef EXT4_FRAGMENTS - ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); - ei->i_frag_no = raw_inode->i_frag; - ei->i_frag_size = raw_inode->i_fsize; -#endif - ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_HURD)) - ei->i_file_acl |= - ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; - if (!S_ISREG(inode->i_mode)) { - ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); - } else { - inode->i_size |= - ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; - } - ei->i_disksize = inode->i_size; - inode->i_generation = le32_to_cpu(raw_inode->i_generation); - ei->i_block_group = iloc.block_group; - /* - * NOTE! The in-memory inode i_data array is in little-endian order - * even on big-endian machines: we do NOT byteswap the block numbers! - */ - for (block = 0; block < EXT4_N_BLOCKS; block++) - ei->i_data[block] = raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - - if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 && - EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - /* - * When mke2fs creates big inodes it does not zero out - * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE, - * so ignore those first few inodes. - */ - ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT4_INODE_SIZE(inode->i_sb)) - goto bad_inode; - if (ei->i_extra_isize == 0) { - /* The extra space is currently unused. Use it. */ - ei->i_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - } else { - __le32 *magic = (void *)raw_inode + - EXT4_GOOD_OLD_INODE_SIZE + - ei->i_extra_isize; - if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) - ei->i_state |= EXT4_STATE_XATTR; - } - } else - ei->i_extra_isize = 0; - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext4_file_inode_operations; - inode->i_fop = &ext4_file_operations; - ext4_set_aops(inode); - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ext4_dir_inode_operations; - inode->i_fop = &ext4_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - if (ext4_inode_is_fast_symlink(inode)) - inode->i_op = &ext4_fast_symlink_inode_operations; - else { - inode->i_op = &ext4_symlink_inode_operations; - ext4_set_aops(inode); - } - } else { - inode->i_op = &ext4_special_inode_operations; - if (raw_inode->i_block[0]) - init_special_inode(inode, inode->i_mode, - old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); - else - init_special_inode(inode, inode->i_mode, - new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); - } - brelse (iloc.bh); - ext4_set_inode_flags(inode); - return; - -bad_inode: - make_bad_inode(inode); - return; -} - -/* - * Post the struct inode info into an on-disk inode location in the - * buffer-cache. This gobbles the caller's reference to the - * buffer_head in the inode location struct. - * - * The caller must have write access to iloc->bh. - */ -static int ext4_do_update_inode(handle_t *handle, - struct inode *inode, - struct ext4_iloc *iloc) -{ - struct ext4_inode *raw_inode = ext4_raw_inode(iloc); - struct ext4_inode_info *ei = EXT4_I(inode); - struct buffer_head *bh = iloc->bh; - int err = 0, rc, block; - - /* For fields not not tracking in the in-memory inode, - * initialise them to zero for new inodes. */ - if (ei->i_state & EXT4_STATE_NEW) - memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); -/* - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ - if(!ei->i_dtime) { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); - raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); - } else { - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } - } else { - raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); - raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); -#ifdef EXT4_FRAGMENTS - raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); - raw_inode->i_frag = ei->i_frag_no; - raw_inode->i_fsize = ei->i_frag_size; -#endif - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_HURD)) - raw_inode->i_file_acl_high = - cpu_to_le16(ei->i_file_acl >> 32); - raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { - raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); - } else { - raw_inode->i_size_high = - cpu_to_le32(ei->i_disksize >> 32); - if (ei->i_disksize > 0x7fffffffULL) { - struct super_block *sb = inode->i_sb; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || - EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) { - /* If this is the first large file - * created, add a flag to the superblock. - */ - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE); - sb->s_dirt = 1; - handle->h_sync = 1; - err = ext4_journal_dirty_metadata(handle, - EXT4_SB(sb)->s_sbh); - } - } - } - raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - raw_inode->i_block[0] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - raw_inode->i_block[1] = 0; - } else { - raw_inode->i_block[0] = 0; - raw_inode->i_block[1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - raw_inode->i_block[2] = 0; - } - } else for (block = 0; block < EXT4_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - - if (ei->i_extra_isize) - raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); - - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - rc = ext4_journal_dirty_metadata(handle, bh); - if (!err) - err = rc; - ei->i_state &= ~EXT4_STATE_NEW; - -out_brelse: - brelse (bh); - ext4_std_error(inode->i_sb, err); - return err; -} - -/* - * ext4_write_inode() - * - * We are called from a few places: - * - * - Within generic_file_write() for O_SYNC files. - * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. - * - * - Within sys_sync(), kupdate and such. - * We wait on commit, if tol to. - * - * - Within prune_icache() (PF_MEMALLOC == true) - * Here we simply return. We can't afford to block kswapd on the - * journal commit. - * - * In all cases it is actually safe for us to return without doing anything, - * because the inode has been copied into a raw inode buffer in - * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for - * knfsd. - * - * Note that we are absolutely dependent upon all inode dirtiers doing the - * right thing: they *must* call mark_inode_dirty() after dirtying info in - * which we are interested. - * - * It would be a bug for them to not do this. The code: - * - * mark_inode_dirty(inode) - * stuff(); - * inode->i_size = expr; - * - * is in error because a kswapd-driven write_inode() could occur while - * `stuff()' is running, and the new i_size will be lost. Plus the inode - * will no longer be on the superblock's dirty inode list. - */ -int ext4_write_inode(struct inode *inode, int wait) -{ - if (current->flags & PF_MEMALLOC) - return 0; - - if (ext4_journal_current_handle()) { - jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n"); - dump_stack(); - return -EIO; - } - - if (!wait) - return 0; - - return ext4_force_commit(inode->i_sb); -} - -/* - * ext4_setattr() - * - * Called from notify_change. - * - * We want to trap VFS attempts to truncate the file as soon as - * possible. In particular, we want to make sure that when the VFS - * shrinks i_size, we put the inode on the orphan list and modify - * i_disksize immediately, so that during the subsequent flushing of - * dirty pages and freeing of disk blocks, we can guarantee that any - * commit will leave the blocks being flushed in an unused state on - * disk. (On recovery, the inode will get truncated and the blocks will - * be freed, so we have a strong guarantee that no future commit will - * leave these blocks visible to the user.) - * - * Called with inode->sem down. - */ -int ext4_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - int error, rc = 0; - const unsigned int ia_valid = attr->ia_valid; - - error = inode_change_ok(inode, attr); - if (error) - return error; - - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - handle_t *handle; - - /* (user+group)*(old+new) structure, inode write (sb, - * inode block, ? - but truncate inode update has it) */ - handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ - EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto err_out; - } - error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; - if (error) { - ext4_journal_stop(handle); - return error; - } - /* Update corresponding info in inode so that everything is in - * one transaction */ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - error = ext4_mark_inode_dirty(handle, inode); - ext4_journal_stop(handle); - } - - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { - handle_t *handle; - - handle = ext4_journal_start(inode, 3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto err_out; - } - - error = ext4_orphan_add(handle, inode); - EXT4_I(inode)->i_disksize = attr->ia_size; - rc = ext4_mark_inode_dirty(handle, inode); - if (!error) - error = rc; - ext4_journal_stop(handle); - } - - rc = inode_setattr(inode, attr); - - /* If inode_setattr's call to ext4_truncate failed to get a - * transaction handle at all, we need to clean up the in-core - * orphan list manually. */ - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - if (!rc && (ia_valid & ATTR_MODE)) - rc = ext4_acl_chmod(inode); - -err_out: - ext4_std_error(inode->i_sb, error); - if (!error) - error = rc; - return error; -} - - -/* - * How many blocks doth make a writepage()? - * - * With N blocks per page, it may be: - * N data blocks - * 2 indirect block - * 2 dindirect - * 1 tindirect - * N+5 bitmap blocks (from the above) - * N+5 group descriptor summary blocks - * 1 inode block - * 1 superblock. - * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files - * - * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS - * - * With ordered or writeback data it's the same, less the N data blocks. - * - * If the inode's direct blocks can hold an integral number of pages then a - * page cannot straddle two indirect blocks, and we can only touch one indirect - * and dindirect block, and the "5" above becomes "3". - * - * This still overestimates under most circumstances. If we were to pass the - * start and end offsets in here as well we could do block_to_path() on each - * block and work out the exact number of indirects which are touched. Pah. - */ - -int ext4_writepage_trans_blocks(struct inode *inode) -{ - int bpp = ext4_journal_blocks_per_page(inode); - int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_writepage_trans_blocks(inode, bpp); - - if (ext4_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else - ret = 2 * (bpp + indirects) + 2; - -#ifdef CONFIG_QUOTA - /* We know that structure was already allocated during DQUOT_INIT so - * we will be updating only the data blocks + inodes */ - ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - - return ret; -} - -/* - * The caller must have previously called ext4_reserve_inode_write(). - * Give this, we know that the caller already has write access to iloc->bh. - */ -int ext4_mark_iloc_dirty(handle_t *handle, - struct inode *inode, struct ext4_iloc *iloc) -{ - int err = 0; - - /* the do_update_inode consumes one bh->b_count */ - get_bh(iloc->bh); - - /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ - err = ext4_do_update_inode(handle, inode, iloc); - put_bh(iloc->bh); - return err; -} - -/* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. - */ - -int -ext4_reserve_inode_write(handle_t *handle, struct inode *inode, - struct ext4_iloc *iloc) -{ - int err = 0; - if (handle) { - err = ext4_get_inode_loc(inode, iloc); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, iloc->bh); - if (err) { - brelse(iloc->bh); - iloc->bh = NULL; - } - } - } - ext4_std_error(inode->i_sb, err); - return err; -} - -/* - * What we do here is to mark the in-core inode as clean with respect to inode - * dirtiness (it may still be data-dirty). - * This means that the in-core inode may be reaped by prune_icache - * without having to perform any I/O. This is a very good thing, - * because *any* task may call prune_icache - even ones which - * have a transaction open against a different journal. - * - * Is this cheating? Not really. Sure, we haven't written the - * inode out, but prune_icache isn't a user-visible syncing function. - * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) - * we start and wait on commits. - * - * Is this efficient/effective? Well, we're being nice to the system - * by cleaning up our inodes proactively so they can be reaped - * without I/O. But we are potentially leaving up to five seconds' - * worth of inodes floating about which prune_icache wants us to - * write out. One way to fix that would be to get prune_icache() - * to do a write_super() to free up some memory. It has the desired - * effect. - */ -int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) -{ - struct ext4_iloc iloc; - int err; - - might_sleep(); - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (!err) - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - return err; -} - -/* - * ext4_dirty_inode() is called from __mark_inode_dirty() - * - * We're really interested in the case where a file is being extended. - * i_size has been changed by generic_commit_write() and we thus need - * to include the updated inode in the current transaction. - * - * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks - * are allocated to the file. - * - * If the inode is marked synchronous, we don't honour that here - doing - * so would cause a commit on atime updates, which we don't bother doing. - * We handle synchronous inodes at the highest possible level. - */ -void ext4_dirty_inode(struct inode *inode) -{ - handle_t *current_handle = ext4_journal_current_handle(); - handle_t *handle; - - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) - goto out; - if (current_handle && - current_handle->h_transaction != handle->h_transaction) { - /* This task has a transaction open against a different fs */ - printk(KERN_EMERG "%s: transactions do not match!\n", - __FUNCTION__); - } else { - jbd_debug(5, "marking dirty. outer handle=%p\n", - current_handle); - ext4_mark_inode_dirty(handle, inode); - } - ext4_journal_stop(handle); -out: - return; -} - -#if 0 -/* - * Bind an inode's backing buffer_head into this transaction, to prevent - * it from being flushed to disk early. Unlike - * ext4_reserve_inode_write, this leaves behind no bh reference and - * returns no iloc structure, so the caller needs to repeat the iloc - * lookup to mark the inode dirty later. - */ -static int ext4_pin_inode(handle_t *handle, struct inode *inode) -{ - struct ext4_iloc iloc; - - int err = 0; - if (handle) { - err = ext4_get_inode_loc(inode, &iloc); - if (!err) { - BUFFER_TRACE(iloc.bh, "get_write_access"); - err = jbd2_journal_get_write_access(handle, iloc.bh); - if (!err) - err = ext4_journal_dirty_metadata(handle, - iloc.bh); - brelse(iloc.bh); - } - } - ext4_std_error(inode->i_sb, err); - return err; -} -#endif - -int ext4_change_inode_journal_flag(struct inode *inode, int val) -{ - journal_t *journal; - handle_t *handle; - int err; - - /* - * We have to be very careful here: changing a data block's - * journaling status dynamically is dangerous. If we write a - * data block to the journal, change the status and then delete - * that block, we risk forgetting to revoke the old log record - * from the journal and so a subsequent replay can corrupt data. - * So, first we make sure that the journal is empty and that - * nobody is changing anything. - */ - - journal = EXT4_JOURNAL(inode); - if (is_journal_aborted(journal) || IS_RDONLY(inode)) - return -EROFS; - - jbd2_journal_lock_updates(journal); - jbd2_journal_flush(journal); - - /* - * OK, there are no updates running now, and all cached data is - * synced to disk. We are now in a completely consistent state - * which doesn't have anything in the journal, and we know that - * no filesystem updates are running, so it is safe to modify - * the inode's in-core data-journaling state flag now. - */ - - if (val) - EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; - else - EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; - ext4_set_aops(inode); - - jbd2_journal_unlock_updates(journal); - - /* Finally we can mark the inode as dirty. */ - - handle = ext4_journal_start(inode, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - err = ext4_mark_inode_dirty(handle, inode); - handle->h_sync = 1; - ext4_journal_stop(handle); - ext4_std_error(inode->i_sb, err); - - return err; -} diff --git a/trunk/fs/ext4/ioctl.c b/trunk/fs/ext4/ioctl.c deleted file mode 100644 index 22a737c306c7..000000000000 --- a/trunk/fs/ext4/ioctl.c +++ /dev/null @@ -1,306 +0,0 @@ -/* - * linux/fs/ext4/ioctl.c - * - * Copyright (C) 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - unsigned int flags; - unsigned short rsv_window_size; - - ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { - case EXT4_IOC_GETFLAGS: - flags = ei->i_flags & EXT4_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); - case EXT4_IOC_SETFLAGS: { - handle_t *handle = NULL; - int err; - struct ext4_iloc iloc; - unsigned int oldflags; - unsigned int jflag; - - if (IS_RDONLY(inode)) - return -EROFS; - - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EACCES; - - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - - if (!S_ISDIR(inode->i_mode)) - flags &= ~EXT4_DIRSYNC_FL; - - mutex_lock(&inode->i_mutex); - oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT4_JOURNAL_DATA_FL; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - return -EPERM; - } - } - - /* - * The JOURNAL_DATA flag can only be changed by - * the relevant capability. - */ - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) { - mutex_unlock(&inode->i_mutex); - return -EPERM; - } - } - - - handle = ext4_journal_start(inode, 1); - if (IS_ERR(handle)) { - mutex_unlock(&inode->i_mutex); - return PTR_ERR(handle); - } - if (IS_SYNC(inode)) - handle->h_sync = 1; - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto flags_err; - - flags = flags & EXT4_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; - ei->i_flags = flags; - - ext4_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME_SEC; - - err = ext4_mark_iloc_dirty(handle, inode, &iloc); -flags_err: - ext4_journal_stop(handle); - if (err) { - mutex_unlock(&inode->i_mutex); - return err; - } - - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) - err = ext4_change_inode_journal_flag(inode, jflag); - mutex_unlock(&inode->i_mutex); - return err; - } - case EXT4_IOC_GETVERSION: - case EXT4_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); - case EXT4_IOC_SETVERSION: - case EXT4_IOC_SETVERSION_OLD: { - handle_t *handle; - struct ext4_iloc iloc; - __u32 generation; - int err; - - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(generation, (int __user *) arg)) - return -EFAULT; - - handle = ext4_journal_start(inode, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err == 0) { - inode->i_ctime = CURRENT_TIME_SEC; - inode->i_generation = generation; - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - } - ext4_journal_stop(handle); - return err; - } -#ifdef CONFIG_JBD_DEBUG - case EXT4_IOC_WAIT_FOR_READONLY: - /* - * This is racy - by the time we're woken up and running, - * the superblock could be released. And the module could - * have been unloaded. So sue me. - * - * Returns 1 if it slept, else zero. - */ - { - struct super_block *sb = inode->i_sb; - DECLARE_WAITQUEUE(wait, current); - int ret = 0; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); - if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) { - schedule(); - ret = 1; - } - remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); - return ret; - } -#endif - case EXT4_IOC_GETRSVSZ: - if (test_opt(inode->i_sb, RESERVATION) - && S_ISREG(inode->i_mode) - && ei->i_block_alloc_info) { - rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; - return put_user(rsv_window_size, (int __user *)arg); - } - return -ENOTTY; - case EXT4_IOC_SETRSVSZ: { - - if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) - return -ENOTTY; - - if (IS_RDONLY(inode)) - return -EROFS; - - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EACCES; - - if (get_user(rsv_window_size, (int __user *)arg)) - return -EFAULT; - - if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) - rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; - - /* - * need to allocate reservation structure for this inode - * before set the window size - */ - mutex_lock(&ei->truncate_mutex); - if (!ei->i_block_alloc_info) - ext4_init_block_alloc_info(inode); - - if (ei->i_block_alloc_info){ - struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; - rsv->rsv_goal_size = rsv_window_size; - } - mutex_unlock(&ei->truncate_mutex); - return 0; - } - case EXT4_IOC_GROUP_EXTEND: { - ext4_fsblk_t n_blocks_count; - struct super_block *sb = inode->i_sb; - int err; - - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - - if (IS_RDONLY(inode)) - return -EROFS; - - if (get_user(n_blocks_count, (__u32 __user *)arg)) - return -EFAULT; - - err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); - jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - jbd2_journal_flush(EXT4_SB(sb)->s_journal); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - - return err; - } - case EXT4_IOC_GROUP_ADD: { - struct ext4_new_group_data input; - struct super_block *sb = inode->i_sb; - int err; - - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - - if (IS_RDONLY(inode)) - return -EROFS; - - if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, - sizeof(input))) - return -EFAULT; - - err = ext4_group_add(sb, &input); - jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - jbd2_journal_flush(EXT4_SB(sb)->s_journal); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - - return err; - } - - default: - return -ENOTTY; - } -} - -#ifdef CONFIG_COMPAT -long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_dentry->d_inode; - int ret; - - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT4_IOC32_GETFLAGS: - cmd = EXT4_IOC_GETFLAGS; - break; - case EXT4_IOC32_SETFLAGS: - cmd = EXT4_IOC_SETFLAGS; - break; - case EXT4_IOC32_GETVERSION: - cmd = EXT4_IOC_GETVERSION; - break; - case EXT4_IOC32_SETVERSION: - cmd = EXT4_IOC_SETVERSION; - break; - case EXT4_IOC32_GROUP_EXTEND: - cmd = EXT4_IOC_GROUP_EXTEND; - break; - case EXT4_IOC32_GETVERSION_OLD: - cmd = EXT4_IOC_GETVERSION_OLD; - break; - case EXT4_IOC32_SETVERSION_OLD: - cmd = EXT4_IOC_SETVERSION_OLD; - break; -#ifdef CONFIG_JBD_DEBUG - case EXT4_IOC32_WAIT_FOR_READONLY: - cmd = EXT4_IOC_WAIT_FOR_READONLY; - break; -#endif - case EXT4_IOC32_GETRSVSZ: - cmd = EXT4_IOC_GETRSVSZ; - break; - case EXT4_IOC32_SETRSVSZ: - cmd = EXT4_IOC_SETRSVSZ; - break; - case EXT4_IOC_GROUP_ADD: - break; - default: - return -ENOIOCTLCMD; - } - lock_kernel(); - ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; -} -#endif diff --git a/trunk/fs/ext4/namei.c b/trunk/fs/ext4/namei.c deleted file mode 100644 index 8b1bd03d20f5..000000000000 --- a/trunk/fs/ext4/namei.c +++ /dev/null @@ -1,2395 +0,0 @@ -/* - * linux/fs/ext4/namei.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/namei.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 - * Hash Tree Directory indexing (c) - * Daniel Phillips, 2001 - * Hash Tree Directory indexing porting - * Christopher Li, 2002 - * Hash Tree Directory indexing cleanup - * Theodore Ts'o, 2002 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "namei.h" -#include "xattr.h" -#include "acl.h" - -/* - * define how far ahead to read directories while searching them. - */ -#define NAMEI_RA_CHUNKS 2 -#define NAMEI_RA_BLOCKS 4 -#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) -#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -static struct buffer_head *ext4_append(handle_t *handle, - struct inode *inode, - u32 *block, int *err) -{ - struct buffer_head *bh; - - *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - - if ((bh = ext4_bread(handle, inode, *block, 1, err))) { - inode->i_size += inode->i_sb->s_blocksize; - EXT4_I(inode)->i_disksize = inode->i_size; - ext4_journal_get_write_access(handle,bh); - } - return bh; -} - -#ifndef assert -#define assert(test) J_ASSERT(test) -#endif - -#ifndef swap -#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -#endif - -#ifdef DX_DEBUG -#define dxtrace(command) command -#else -#define dxtrace(command) -#endif - -struct fake_dirent -{ - __le32 inode; - __le16 rec_len; - u8 name_len; - u8 file_type; -}; - -struct dx_countlimit -{ - __le16 limit; - __le16 count; -}; - -struct dx_entry -{ - __le32 hash; - __le32 block; -}; - -/* - * dx_root_info is laid out so that if it should somehow get overlaid by a - * dirent the two low bits of the hash version will be zero. Therefore, the - * hash version mod 4 should never be 0. Sincerely, the paranoia department. - */ - -struct dx_root -{ - struct fake_dirent dot; - char dot_name[4]; - struct fake_dirent dotdot; - char dotdot_name[4]; - struct dx_root_info - { - __le32 reserved_zero; - u8 hash_version; - u8 info_length; /* 8 */ - u8 indirect_levels; - u8 unused_flags; - } - info; - struct dx_entry entries[0]; -}; - -struct dx_node -{ - struct fake_dirent fake; - struct dx_entry entries[0]; -}; - - -struct dx_frame -{ - struct buffer_head *bh; - struct dx_entry *entries; - struct dx_entry *at; -}; - -struct dx_map_entry -{ - u32 hash; - u32 offs; -}; - -#ifdef CONFIG_EXT4_INDEX -static inline unsigned dx_get_block (struct dx_entry *entry); -static void dx_set_block (struct dx_entry *entry, unsigned value); -static inline unsigned dx_get_hash (struct dx_entry *entry); -static void dx_set_hash (struct dx_entry *entry, unsigned value); -static unsigned dx_get_count (struct dx_entry *entries); -static unsigned dx_get_limit (struct dx_entry *entries); -static void dx_set_count (struct dx_entry *entries, unsigned value); -static void dx_set_limit (struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -static unsigned dx_node_limit (struct inode *dir); -static struct dx_frame *dx_probe(struct dentry *dentry, - struct inode *dir, - struct dx_hash_info *hinfo, - struct dx_frame *frame, - int *err); -static void dx_release (struct dx_frame *frames); -static int dx_make_map (struct ext4_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); -static void dx_sort_map(struct dx_map_entry *map, unsigned count); -static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, - struct dx_map_entry *offsets, int count); -static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); -static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, - __u32 *start_hash); -static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, - struct ext4_dir_entry_2 **res_dir, int *err); -static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode); - -/* - * Future: use high four bits of block for coalesce-on-delete flags - * Mask them off for now. - */ - -static inline unsigned dx_get_block (struct dx_entry *entry) -{ - return le32_to_cpu(entry->block) & 0x00ffffff; -} - -static inline void dx_set_block (struct dx_entry *entry, unsigned value) -{ - entry->block = cpu_to_le32(value); -} - -static inline unsigned dx_get_hash (struct dx_entry *entry) -{ - return le32_to_cpu(entry->hash); -} - -static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -{ - entry->hash = cpu_to_le32(value); -} - -static inline unsigned dx_get_count (struct dx_entry *entries) -{ - return le16_to_cpu(((struct dx_countlimit *) entries)->count); -} - -static inline unsigned dx_get_limit (struct dx_entry *entries) -{ - return le16_to_cpu(((struct dx_countlimit *) entries)->limit); -} - -static inline void dx_set_count (struct dx_entry *entries, unsigned value) -{ - ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); -} - -static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -{ - ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); -} - -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -{ - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - - EXT4_DIR_REC_LEN(2) - infosize; - return 0? 20: entry_space / sizeof(struct dx_entry); -} - -static inline unsigned dx_node_limit (struct inode *dir) -{ - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); - return 0? 22: entry_space / sizeof(struct dx_entry); -} - -/* - * Debug - */ -#ifdef DX_DEBUG -static void dx_show_index (char * label, struct dx_entry *entries) -{ - int i, n = dx_get_count (entries); - printk("%s index ", label); - for (i = 0; i < n; i++) { - printk("%x->%u ", i? dx_get_hash(entries + i) : - 0, dx_get_block(entries + i)); - } - printk("\n"); -} - -struct stats -{ - unsigned names; - unsigned space; - unsigned bcount; -}; - -static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de, - int size, int show_names) -{ - unsigned names = 0, space = 0; - char *base = (char *) de; - struct dx_hash_info h = *hinfo; - - printk("names: "); - while ((char *) de < base + size) - { - if (de->inode) - { - if (show_names) - { - int len = de->name_len; - char *name = de->name; - while (len--) printk("%c", *name++); - ext4fs_dirhash(de->name, de->name_len, &h); - printk(":%x.%u ", h.hash, - ((char *) de - base)); - } - space += EXT4_DIR_REC_LEN(de->name_len); - names++; - } - de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); - } - printk("(%i)\n", names); - return (struct stats) { names, space, 1 }; -} - -struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, - struct dx_entry *entries, int levels) -{ - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count = dx_get_count (entries), names = 0, space = 0, i; - unsigned bcount = 0; - struct buffer_head *bh; - int err; - printk("%i indexed blocks...\n", count); - for (i = 0; i < count; i++, entries++) - { - u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; - u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; - struct stats stats; - printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); - if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; - stats = levels? - dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): - dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); - names += stats.names; - space += stats.space; - bcount += stats.bcount; - brelse (bh); - } - if (bcount) - printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", - names, space/bcount,(space/bcount)*100/blocksize); - return (struct stats) { names, space, bcount}; -} -#endif /* DX_DEBUG */ - -/* - * Probe for a directory leaf block to search. - * - * dx_probe can return ERR_BAD_DX_DIR, which means there was a format - * error in the directory index, and the caller should fall back to - * searching the directory normally. The callers of dx_probe **MUST** - * check for this error code, and make sure it never gets reflected - * back to userspace. - */ -static struct dx_frame * -dx_probe(struct dentry *dentry, struct inode *dir, - struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -{ - unsigned count, indirect; - struct dx_entry *at, *entries, *p, *q, *m; - struct dx_root *root; - struct buffer_head *bh; - struct dx_frame *frame = frame_in; - u32 hash; - - frame->bh = NULL; - if (dentry) - dir = dentry->d_parent->d_inode; - if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) - goto fail; - root = (struct dx_root *) bh->b_data; - if (root->info.hash_version != DX_HASH_TEA && - root->info.hash_version != DX_HASH_HALF_MD4 && - root->info.hash_version != DX_HASH_LEGACY) { - ext4_warning(dir->i_sb, __FUNCTION__, - "Unrecognised inode hash code %d", - root->info.hash_version); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - hinfo->hash_version = root->info.hash_version; - hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; - if (dentry) - ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); - hash = hinfo->hash; - - if (root->info.unused_flags & 1) { - ext4_warning(dir->i_sb, __FUNCTION__, - "Unimplemented inode hash flags: %#06x", - root->info.unused_flags); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - - if ((indirect = root->info.indirect_levels) > 1) { - ext4_warning(dir->i_sb, __FUNCTION__, - "Unimplemented inode hash depth: %#06x", - root->info.indirect_levels); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - - entries = (struct dx_entry *) (((char *)&root->info) + - root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); - dxtrace (printk("Look up %x", hash)); - while (1) - { - count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); - p = entries + 1; - q = entries + count - 1; - while (p <= q) - { - m = p + (q - p)/2; - dxtrace(printk(".")); - if (dx_get_hash(m) > hash) - q = m - 1; - else - p = m + 1; - } - - if (0) // linear search cross check - { - unsigned n = count - 1; - at = entries; - while (n--) - { - dxtrace(printk(",")); - if (dx_get_hash(++at) > hash) - { - at--; - break; - } - } - assert (at == p - 1); - } - - at = p - 1; - dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); - frame->bh = bh; - frame->entries = entries; - frame->at = at; - if (!indirect--) return frame; - if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) - goto fail2; - at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); - frame++; - } -fail2: - while (frame >= frame_in) { - brelse(frame->bh); - frame--; - } -fail: - return NULL; -} - -static void dx_release (struct dx_frame *frames) -{ - if (frames[0].bh == NULL) - return; - - if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) - brelse(frames[1].bh); - brelse(frames[0].bh); -} - -/* - * This function increments the frame pointer to search the next leaf - * block, and reads in the necessary intervening nodes if the search - * should be necessary. Whether or not the search is necessary is - * controlled by the hash parameter. If the hash value is even, then - * the search is only continued if the next block starts with that - * hash value. This is used if we are searching for a specific file. - * - * If the hash value is HASH_NB_ALWAYS, then always go to the next block. - * - * This function returns 1 if the caller should continue to search, - * or 0 if it should not. If there is an error reading one of the - * index blocks, it will a negative error code. - * - * If start_hash is non-null, it will be filled in with the starting - * hash of the next page. - */ -static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, - __u32 *start_hash) -{ - struct dx_frame *p; - struct buffer_head *bh; - int err, num_frames = 0; - __u32 bhash; - - p = frame; - /* - * Find the next leaf page by incrementing the frame pointer. - * If we run out of entries in the interior node, loop around and - * increment pointer in the parent node. When we break out of - * this loop, num_frames indicates the number of interior - * nodes need to be read. - */ - while (1) { - if (++(p->at) < p->entries + dx_get_count(p->entries)) - break; - if (p == frames) - return 0; - num_frames++; - p--; - } - - /* - * If the hash is 1, then continue only if the next page has a - * continuation hash of any value. This is used for readdir - * handling. Otherwise, check to see if the hash matches the - * desired contiuation hash. If it doesn't, return since - * there's no point to read in the successive index pages. - */ - bhash = dx_get_hash(p->at); - if (start_hash) - *start_hash = bhash; - if ((hash & 1) == 0) { - if ((bhash & ~1) != hash) - return 0; - } - /* - * If the hash is HASH_NB_ALWAYS, we always go to the next - * block so no check is necessary - */ - while (num_frames--) { - if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), - 0, &err))) - return err; /* Failure */ - p++; - brelse (p->bh); - p->bh = bh; - p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; - } - return 1; -} - - -/* - * p is at least 6 bytes before the end of page - */ -static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) -{ - return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -} - -/* - * This function fills a red-black tree with information from a - * directory block. It returns the number directory entries loaded - * into the tree. If there is an error it is returned in err. - */ -static int htree_dirblock_to_tree(struct file *dir_file, - struct inode *dir, int block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash) -{ - struct buffer_head *bh; - struct ext4_dir_entry_2 *de, *top; - int err, count = 0; - - dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); - if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) - return err; - - de = (struct ext4_dir_entry_2 *) bh->b_data; - top = (struct ext4_dir_entry_2 *) ((char *) de + - dir->i_sb->s_blocksize - - EXT4_DIR_REC_LEN(0)); - for (; de < top; de = ext4_next_entry(de)) { - ext4fs_dirhash(de->name, de->name_len, hinfo); - if ((hinfo->hash < start_hash) || - ((hinfo->hash == start_hash) && - (hinfo->minor_hash < start_minor_hash))) - continue; - if (de->inode == 0) - continue; - if ((err = ext4_htree_store_dirent(dir_file, - hinfo->hash, hinfo->minor_hash, de)) != 0) { - brelse(bh); - return err; - } - count++; - } - brelse(bh); - return count; -} - - -/* - * This function fills a red-black tree with information from a - * directory. We start scanning the directory in hash order, starting - * at start_hash and start_minor_hash. - * - * This function returns the number of entries inserted into the tree, - * or a negative error code. - */ -int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, - __u32 start_minor_hash, __u32 *next_hash) -{ - struct dx_hash_info hinfo; - struct ext4_dir_entry_2 *de; - struct dx_frame frames[2], *frame; - struct inode *dir; - int block, err; - int count = 0; - int ret; - __u32 hashval; - - dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, - start_minor_hash)); - dir = dir_file->f_dentry->d_inode; - if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { - hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; - hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; - count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, - start_hash, start_minor_hash); - *next_hash = ~0; - return count; - } - hinfo.hash = start_hash; - hinfo.minor_hash = 0; - frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err); - if (!frame) - return err; - - /* Add '.' and '..' from the htree header */ - if (!start_hash && !start_minor_hash) { - de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; - if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0) - goto errout; - count++; - } - if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { - de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; - de = ext4_next_entry(de); - if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) - goto errout; - count++; - } - - while (1) { - block = dx_get_block(frame->at); - ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, - start_hash, start_minor_hash); - if (ret < 0) { - err = ret; - goto errout; - } - count += ret; - hashval = ~0; - ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS, - frame, frames, &hashval); - *next_hash = hashval; - if (ret < 0) { - err = ret; - goto errout; - } - /* - * Stop if: (a) there are no more entries, or - * (b) we have inserted at least one entry and the - * next hash value is not a continuation - */ - if ((ret == 0) || - (count && ((hashval & 1) == 0))) - break; - } - dx_release(frames); - dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", - count, *next_hash)); - return count; -errout: - dx_release(frames); - return (err); -} - - -/* - * Directory block splitting, compacting - */ - -static int dx_make_map (struct ext4_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -{ - int count = 0; - char *base = (char *) de; - struct dx_hash_info h = *hinfo; - - while ((char *) de < base + size) - { - if (de->name_len && de->inode) { - ext4fs_dirhash(de->name, de->name_len, &h); - map_tail--; - map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); - count++; - cond_resched(); - } - /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); - } - return count; -} - -static void dx_sort_map (struct dx_map_entry *map, unsigned count) -{ - struct dx_map_entry *p, *q, *top = map + count - 1; - int more; - /* Combsort until bubble sort doesn't suck */ - while (count > 2) { - count = count*10/13; - if (count - 9 < 2) /* 9, 10 -> 11 */ - count = 11; - for (p = top, q = p - count; q >= map; p--, q--) - if (p->hash < q->hash) - swap(*p, *q); - } - /* Garden variety bubble sort */ - do { - more = 0; - q = top; - while (q-- > map) { - if (q[1].hash >= q[0].hash) - continue; - swap(*(q+1), *q); - more = 1; - } - } while(more); -} - -static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -{ - struct dx_entry *entries = frame->entries; - struct dx_entry *old = frame->at, *new = old + 1; - int count = dx_get_count(entries); - - assert(count < dx_get_limit(entries)); - assert(old < entries + count); - memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); - dx_set_hash(new, hash); - dx_set_block(new, block); - dx_set_count(entries, count + 1); -} -#endif - - -static void ext4_update_dx_flag(struct inode *inode) -{ - if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX)) - EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; -} - -/* - * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure. - * - * `len <= EXT4_NAME_LEN' is guaranteed by caller. - * `de != NULL' is guaranteed by caller. - */ -static inline int ext4_match (int len, const char * const name, - struct ext4_dir_entry_2 * de) -{ - if (len != de->name_len) - return 0; - if (!de->inode) - return 0; - return !memcmp(name, de->name, len); -} - -/* - * Returns 0 if not found, -1 on failure, and 1 on success - */ -static inline int search_dirblock(struct buffer_head * bh, - struct inode *dir, - struct dentry *dentry, - unsigned long offset, - struct ext4_dir_entry_2 ** res_dir) -{ - struct ext4_dir_entry_2 * de; - char * dlimit; - int de_len; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - - de = (struct ext4_dir_entry_2 *) bh->b_data; - dlimit = bh->b_data + dir->i_sb->s_blocksize; - while ((char *) de < dlimit) { - /* this code is executed quadratically often */ - /* do minimal checking `by hand' */ - - if ((char *) de + namelen <= dlimit && - ext4_match (namelen, name, de)) { - /* found a match - just to be sure, do a full check */ - if (!ext4_check_dir_entry("ext4_find_entry", - dir, de, bh, offset)) - return -1; - *res_dir = de; - return 1; - } - /* prevent looping on a bad block */ - de_len = le16_to_cpu(de->rec_len); - if (de_len <= 0) - return -1; - offset += de_len; - de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); - } - return 0; -} - - -/* - * ext4_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the cache buffer in which the entry was found, and the entry - * itself (as a parameter - res_dir). It does NOT read the inode of the - * entry - you'll have to do that yourself if you want to. - * - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -static struct buffer_head * ext4_find_entry (struct dentry *dentry, - struct ext4_dir_entry_2 ** res_dir) -{ - struct super_block * sb; - struct buffer_head * bh_use[NAMEI_RA_SIZE]; - struct buffer_head * bh, *ret = NULL; - unsigned long start, block, b; - int ra_max = 0; /* Number of bh's in the readahead - buffer, bh_use[] */ - int ra_ptr = 0; /* Current index into readahead - buffer */ - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; - int namelen; - const u8 *name; - unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; - blocksize = sb->s_blocksize; - namelen = dentry->d_name.len; - name = dentry->d_name.name; - if (namelen > EXT4_NAME_LEN) - return NULL; -#ifdef CONFIG_EXT4_INDEX - if (is_dx(dir)) { - bh = ext4_dx_find_entry(dentry, res_dir, &err); - /* - * On success, or if the error was file not found, - * return. Otherwise, fall back to doing a search the - * old fashioned way. - */ - if (bh || (err != ERR_BAD_DX_DIR)) - return bh; - dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); - } -#endif - nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); - start = EXT4_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -restart: - do { - /* - * We deal with the read-ahead logic here. - */ - if (ra_ptr >= ra_max) { - /* Refill the readahead buffer */ - ra_ptr = 0; - b = block; - for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { - /* - * Terminate if we reach the end of the - * directory and must wrap, or if our - * search has finished at this block. - */ - if (b >= nblocks || (num && block == start)) { - bh_use[ra_max] = NULL; - break; - } - num++; - bh = ext4_getblk(NULL, dir, b++, 0, &err); - bh_use[ra_max] = bh; - if (bh) - ll_rw_block(READ_META, 1, &bh); - } - } - if ((bh = bh_use[ra_ptr++]) == NULL) - goto next; - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - /* read error, skip block & hope for the best */ - ext4_error(sb, __FUNCTION__, "reading directory #%lu " - "offset %lu", dir->i_ino, block); - brelse(bh); - goto next; - } - i = search_dirblock(bh, dir, dentry, - block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { - EXT4_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { - brelse(bh); - if (i < 0) - goto cleanup_and_exit; - } - next: - if (++block >= nblocks) - block = 0; - } while (block != start); - - /* - * If the directory has grown while we were searching, then - * search the last part of the directory before giving up. - */ - block = nblocks; - nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); - if (block < nblocks) { - start = 0; - goto restart; - } - -cleanup_and_exit: - /* Clean up the read-ahead blocks */ - for (; ra_ptr < ra_max; ra_ptr++) - brelse (bh_use[ra_ptr]); - return ret; -} - -#ifdef CONFIG_EXT4_INDEX -static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, - struct ext4_dir_entry_2 **res_dir, int *err) -{ - struct super_block * sb; - struct dx_hash_info hinfo; - u32 hash; - struct dx_frame frames[2], *frame; - struct ext4_dir_entry_2 *de, *top; - struct buffer_head *bh; - unsigned long block; - int retval; - int namelen = dentry->d_name.len; - const u8 *name = dentry->d_name.name; - struct inode *dir = dentry->d_parent->d_inode; - - sb = dir->i_sb; - /* NFS may look up ".." - look at dx_root directory block */ - if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ - if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) - return NULL; - } else { - frame = frames; - frame->bh = NULL; /* for dx_release() */ - frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ - dx_set_block(frame->at, 0); /* dx_root block is 0 */ - } - hash = hinfo.hash; - do { - block = dx_get_block(frame->at); - if (!(bh = ext4_bread (NULL,dir, block, 0, err))) - goto errout; - de = (struct ext4_dir_entry_2 *) bh->b_data; - top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - - EXT4_DIR_REC_LEN(0)); - for (; de < top; de = ext4_next_entry(de)) - if (ext4_match (namelen, name, de)) { - if (!ext4_check_dir_entry("ext4_find_entry", - dir, de, bh, - (block<b_data))) { - brelse (bh); - goto errout; - } - *res_dir = de; - dx_release (frames); - return bh; - } - brelse (bh); - /* Check to see if we should continue to search */ - retval = ext4_htree_next_block(dir, hash, frame, - frames, NULL); - if (retval < 0) { - ext4_warning(sb, __FUNCTION__, - "error reading index page in directory #%lu", - dir->i_ino); - *err = retval; - goto errout; - } - } while (retval == 1); - - *err = -ENOENT; -errout: - dxtrace(printk("%s not found\n", name)); - dx_release (frames); - return NULL; -} -#endif - -static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) -{ - struct inode * inode; - struct ext4_dir_entry_2 * de; - struct buffer_head * bh; - - if (dentry->d_name.len > EXT4_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - - bh = ext4_find_entry(dentry, &de); - inode = NULL; - if (bh) { - unsigned long ino = le32_to_cpu(de->inode); - brelse (bh); - if (!ext4_valid_inum(dir->i_sb, ino)) { - ext4_error(dir->i_sb, "ext4_lookup", - "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - } - return d_splice_alias(inode, dentry); -} - - -struct dentry *ext4_get_parent(struct dentry *child) -{ - unsigned long ino; - struct dentry *parent; - struct inode *inode; - struct dentry dotdot; - struct ext4_dir_entry_2 * de; - struct buffer_head *bh; - - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - dotdot.d_parent = child; /* confusing, isn't it! */ - - bh = ext4_find_entry(&dotdot, &de); - inode = NULL; - if (!bh) - return ERR_PTR(-ENOENT); - ino = le32_to_cpu(de->inode); - brelse(bh); - - if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { - ext4_error(child->d_inode->i_sb, "ext4_get_parent", - "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; -} - -#define S_SHIFT 12 -static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, - [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK, - [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK, -}; - -static inline void ext4_set_de_type(struct super_block *sb, - struct ext4_dir_entry_2 *de, - umode_t mode) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) - de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - -#ifdef CONFIG_EXT4_INDEX -static struct ext4_dir_entry_2 * -dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -{ - unsigned rec_len = 0; - - while (count--) { - struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); - rec_len = EXT4_DIR_REC_LEN(de->name_len); - memcpy (to, de, rec_len); - ((struct ext4_dir_entry_2 *) to)->rec_len = - cpu_to_le16(rec_len); - de->inode = 0; - map++; - to += rec_len; - } - return (struct ext4_dir_entry_2 *) (to - rec_len); -} - -static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) -{ - struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; - unsigned rec_len = 0; - - prev = to = de; - while ((char*)de < base + size) { - next = (struct ext4_dir_entry_2 *) ((char *) de + - le16_to_cpu(de->rec_len)); - if (de->inode && de->name_len) { - rec_len = EXT4_DIR_REC_LEN(de->name_len); - if (de > to) - memmove(to, de, rec_len); - to->rec_len = cpu_to_le16(rec_len); - prev = to; - to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); - } - de = next; - } - return prev; -} - -static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, - struct buffer_head **bh,struct dx_frame *frame, - struct dx_hash_info *hinfo, int *error) -{ - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; - struct buffer_head *bh2; - u32 newblock; - u32 hash2; - struct dx_map_entry *map; - char *data1 = (*bh)->b_data, *data2; - unsigned split; - struct ext4_dir_entry_2 *de = NULL, *de2; - int err; - - bh2 = ext4_append (handle, dir, &newblock, error); - if (!(bh2)) { - brelse(*bh); - *bh = NULL; - goto errout; - } - - BUFFER_TRACE(*bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, *bh); - if (err) { - journal_error: - brelse(*bh); - brelse(bh2); - *bh = NULL; - ext4_std_error(dir->i_sb, err); - goto errout; - } - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, frame->bh); - if (err) - goto journal_error; - - data2 = bh2->b_data; - - /* create map in the end of data2 block */ - map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map ((struct ext4_dir_entry_2 *) data1, - blocksize, hinfo, map); - map -= count; - split = count/2; // need to adjust to actual middle - dx_sort_map (map, count); - hash2 = map[split].hash; - continued = hash2 == map[split - 1].hash; - dxtrace(printk("Split block %i at %x, %i/%i\n", - dx_get_block(frame->at), hash2, split, count-split)); - - /* Fancy dance to stay within two buffers */ - de2 = dx_move_dirents(data1, data2, map + split, count - split); - de = dx_pack_dirents(data1,blocksize); - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); - de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); - - /* Which block gets the new entry? */ - if (hinfo->hash >= hash2) - { - swap(*bh, bh2); - de = de2; - } - dx_insert_block (frame, hash2 + continued, newblock); - err = ext4_journal_dirty_metadata (handle, bh2); - if (err) - goto journal_error; - err = ext4_journal_dirty_metadata (handle, frame->bh); - if (err) - goto journal_error; - brelse (bh2); - dxtrace(dx_show_index ("frame", frame->entries)); -errout: - return de; -} -#endif - - -/* - * Add a new entry into a directory (leaf) block. If de is non-NULL, - * it points to a directory entry which is guaranteed to be large - * enough for new directory entry. If de is NULL, then - * add_dirent_to_buf will attempt search the directory block for - * space. It will return -ENOSPC if no space is available, and -EIO - * and -EEXIST if directory entry already exists. - * - * NOTE! bh is NOT released in the case where ENOSPC is returned. In - * all other cases bh is released. - */ -static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, - struct inode *inode, struct ext4_dir_entry_2 *de, - struct buffer_head * bh) -{ - struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned long offset = 0; - unsigned short reclen; - int nlen, rlen, err; - char *top; - - reclen = EXT4_DIR_REC_LEN(namelen); - if (!de) { - de = (struct ext4_dir_entry_2 *)bh->b_data; - top = bh->b_data + dir->i_sb->s_blocksize - reclen; - while ((char *) de <= top) { - if (!ext4_check_dir_entry("ext4_add_entry", dir, de, - bh, offset)) { - brelse (bh); - return -EIO; - } - if (ext4_match (namelen, name, de)) { - brelse (bh); - return -EEXIST; - } - nlen = EXT4_DIR_REC_LEN(de->name_len); - rlen = le16_to_cpu(de->rec_len); - if ((de->inode? rlen - nlen: rlen) >= reclen) - break; - de = (struct ext4_dir_entry_2 *)((char *)de + rlen); - offset += rlen; - } - if ((char *) de > top) - return -ENOSPC; - } - BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); - if (err) { - ext4_std_error(dir->i_sb, err); - brelse(bh); - return err; - } - - /* By now the buffer is marked for journaling */ - nlen = EXT4_DIR_REC_LEN(de->name_len); - rlen = le16_to_cpu(de->rec_len); - if (de->inode) { - struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); - de1->rec_len = cpu_to_le16(rlen - nlen); - de->rec_len = cpu_to_le16(nlen); - de = de1; - } - de->file_type = EXT4_FT_UNKNOWN; - if (inode) { - de->inode = cpu_to_le32(inode->i_ino); - ext4_set_de_type(dir->i_sb, de, inode->i_mode); - } else - de->inode = 0; - de->name_len = namelen; - memcpy (de->name, name, namelen); - /* - * XXX shouldn't update any times until successful - * completion of syscall, but too many callers depend - * on this. - * - * XXX similarly, too many callers depend on - * ext4_new_inode() setting the times, but error - * recovery deletes the inode, so the worst that can - * happen is that the times are slightly out of date - * and/or different from the directory change time. - */ - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - ext4_update_dx_flag(dir); - dir->i_version++; - ext4_mark_inode_dirty(handle, dir); - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); - if (err) - ext4_std_error(dir->i_sb, err); - brelse(bh); - return 0; -} - -#ifdef CONFIG_EXT4_INDEX -/* - * This converts a one block unindexed directory to a 3 block indexed - * directory, and adds the dentry to the indexed directory. - */ -static int make_indexed_dir(handle_t *handle, struct dentry *dentry, - struct inode *inode, struct buffer_head *bh) -{ - struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - struct buffer_head *bh2; - struct dx_root *root; - struct dx_frame frames[2], *frame; - struct dx_entry *entries; - struct ext4_dir_entry_2 *de, *de2; - char *data1, *top; - unsigned len; - int retval; - unsigned blocksize; - struct dx_hash_info hinfo; - u32 block; - struct fake_dirent *fde; - - blocksize = dir->i_sb->s_blocksize; - dxtrace(printk("Creating index\n")); - retval = ext4_journal_get_write_access(handle, bh); - if (retval) { - ext4_std_error(dir->i_sb, retval); - brelse(bh); - return retval; - } - root = (struct dx_root *) bh->b_data; - - bh2 = ext4_append (handle, dir, &block, &retval); - if (!(bh2)) { - brelse(bh); - return retval; - } - EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; - data1 = bh2->b_data; - - /* The 0th block becomes the root, move the dirents out */ - fde = &root->dotdot; - de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); - len = ((char *) root) + blocksize - (char *) de; - memcpy (data1, de, len); - de = (struct ext4_dir_entry_2 *) data1; - top = data1 + len; - while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) - de = de2; - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); - /* Initialize the root; the dot dirents already exist */ - de = (struct ext4_dir_entry_2 *) (&root->dotdot); - de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2)); - memset (&root->info, 0, sizeof(root->info)); - root->info.info_length = sizeof(root->info); - root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; - entries = root->entries; - dx_set_block (entries, 1); - dx_set_count (entries, 1); - dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); - - /* Initialize as for dx_probe */ - hinfo.hash_version = root->info.hash_version; - hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; - ext4fs_dirhash(name, namelen, &hinfo); - frame = frames; - frame->entries = entries; - frame->at = entries; - frame->bh = bh; - bh = bh2; - de = do_split(handle,dir, &bh, frame, &hinfo, &retval); - dx_release (frames); - if (!(de)) - return retval; - - return add_dirent_to_buf(handle, dentry, inode, de, bh); -} -#endif - -/* - * ext4_add_entry() - * - * adds a file entry to the specified directory, using the same - * semantics as ext4_find_entry(). It returns NULL if it failed. - * - * NOTE!! The inode part of 'de' is left at 0 - which means you - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -static int ext4_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) -{ - struct inode *dir = dentry->d_parent->d_inode; - unsigned long offset; - struct buffer_head * bh; - struct ext4_dir_entry_2 *de; - struct super_block * sb; - int retval; -#ifdef CONFIG_EXT4_INDEX - int dx_fallback=0; -#endif - unsigned blocksize; - u32 block, blocks; - - sb = dir->i_sb; - blocksize = sb->s_blocksize; - if (!dentry->d_name.len) - return -EINVAL; -#ifdef CONFIG_EXT4_INDEX - if (is_dx(dir)) { - retval = ext4_dx_add_entry(handle, dentry, inode); - if (!retval || (retval != ERR_BAD_DX_DIR)) - return retval; - EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; - dx_fallback++; - ext4_mark_inode_dirty(handle, dir); - } -#endif - blocks = dir->i_size >> sb->s_blocksize_bits; - for (block = 0, offset = 0; block < blocks; block++) { - bh = ext4_bread(handle, dir, block, 0, &retval); - if(!bh) - return retval; - retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (retval != -ENOSPC) - return retval; - -#ifdef CONFIG_EXT4_INDEX - if (blocks == 1 && !dx_fallback && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) - return make_indexed_dir(handle, dentry, inode, bh); -#endif - brelse(bh); - } - bh = ext4_append(handle, dir, &block, &retval); - if (!bh) - return retval; - de = (struct ext4_dir_entry_2 *) bh->b_data; - de->inode = 0; - de->rec_len = cpu_to_le16(blocksize); - return add_dirent_to_buf(handle, dentry, inode, de, bh); -} - -#ifdef CONFIG_EXT4_INDEX -/* - * Returns 0 for success, or a negative error value - */ -static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) -{ - struct dx_frame frames[2], *frame; - struct dx_entry *entries, *at; - struct dx_hash_info hinfo; - struct buffer_head * bh; - struct inode *dir = dentry->d_parent->d_inode; - struct super_block * sb = dir->i_sb; - struct ext4_dir_entry_2 *de; - int err; - - frame = dx_probe(dentry, NULL, &hinfo, frames, &err); - if (!frame) - return err; - entries = frame->entries; - at = frame->at; - - if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) - goto cleanup; - - BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); - if (err) - goto journal_error; - - err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (err != -ENOSPC) { - bh = NULL; - goto cleanup; - } - - /* Block full, should compress but for now just split */ - dxtrace(printk("using %u of %u node entries\n", - dx_get_count(entries), dx_get_limit(entries))); - /* Need to split index? */ - if (dx_get_count(entries) == dx_get_limit(entries)) { - u32 newblock; - unsigned icount = dx_get_count(entries); - int levels = frame - frames; - struct dx_entry *entries2; - struct dx_node *node2; - struct buffer_head *bh2; - - if (levels && (dx_get_count(frames->entries) == - dx_get_limit(frames->entries))) { - ext4_warning(sb, __FUNCTION__, - "Directory index full!"); - err = -ENOSPC; - goto cleanup; - } - bh2 = ext4_append (handle, dir, &newblock, &err); - if (!(bh2)) - goto cleanup; - node2 = (struct dx_node *)(bh2->b_data); - entries2 = node2->entries; - node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); - node2->fake.inode = 0; - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, frame->bh); - if (err) - goto journal_error; - if (levels) { - unsigned icount1 = icount/2, icount2 = icount - icount1; - unsigned hash2 = dx_get_hash(entries + icount1); - dxtrace(printk("Split index %i/%i\n", icount1, icount2)); - - BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ - err = ext4_journal_get_write_access(handle, - frames[0].bh); - if (err) - goto journal_error; - - memcpy ((char *) entries2, (char *) (entries + icount1), - icount2 * sizeof(struct dx_entry)); - dx_set_count (entries, icount1); - dx_set_count (entries2, icount2); - dx_set_limit (entries2, dx_node_limit(dir)); - - /* Which index block gets the new entry? */ - if (at - entries >= icount1) { - frame->at = at = at - entries - icount1 + entries2; - frame->entries = entries = entries2; - swap(frame->bh, bh2); - } - dx_insert_block (frames + 0, hash2, newblock); - dxtrace(dx_show_index ("node", frames[1].entries)); - dxtrace(dx_show_index ("node", - ((struct dx_node *) bh2->b_data)->entries)); - err = ext4_journal_dirty_metadata(handle, bh2); - if (err) - goto journal_error; - brelse (bh2); - } else { - dxtrace(printk("Creating second level index...\n")); - memcpy((char *) entries2, (char *) entries, - icount * sizeof(struct dx_entry)); - dx_set_limit(entries2, dx_node_limit(dir)); - - /* Set up root */ - dx_set_count(entries, 1); - dx_set_block(entries + 0, newblock); - ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; - - /* Add new access path frame */ - frame = frames + 1; - frame->at = at = at - entries + entries2; - frame->entries = entries = entries2; - frame->bh = bh2; - err = ext4_journal_get_write_access(handle, - frame->bh); - if (err) - goto journal_error; - } - ext4_journal_dirty_metadata(handle, frames[0].bh); - } - de = do_split(handle, dir, &bh, frame, &hinfo, &err); - if (!de) - goto cleanup; - err = add_dirent_to_buf(handle, dentry, inode, de, bh); - bh = NULL; - goto cleanup; - -journal_error: - ext4_std_error(dir->i_sb, err); -cleanup: - if (bh) - brelse(bh); - dx_release(frames); - return err; -} -#endif - -/* - * ext4_delete_entry deletes a directory entry by merging it with the - * previous entry - */ -static int ext4_delete_entry (handle_t *handle, - struct inode * dir, - struct ext4_dir_entry_2 * de_del, - struct buffer_head * bh) -{ - struct ext4_dir_entry_2 * de, * pde; - int i; - - i = 0; - pde = NULL; - de = (struct ext4_dir_entry_2 *) bh->b_data; - while (i < bh->b_size) { - if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i)) - return -EIO; - if (de == de_del) { - BUFFER_TRACE(bh, "get_write_access"); - ext4_journal_get_write_access(handle, bh); - if (pde) - pde->rec_len = - cpu_to_le16(le16_to_cpu(pde->rec_len) + - le16_to_cpu(de->rec_len)); - else - de->inode = 0; - dir->i_version++; - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, bh); - return 0; - } - i += le16_to_cpu(de->rec_len); - pde = de; - de = (struct ext4_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); - } - return -ENOENT; -} - -/* - * ext4_mark_inode_dirty is somewhat expensive, so unlike ext2 we - * do not perform it in these functions. We perform it at the call site, - * if it is needed. - */ -static inline void ext4_inc_count(handle_t *handle, struct inode *inode) -{ - inc_nlink(inode); -} - -static inline void ext4_dec_count(handle_t *handle, struct inode *inode) -{ - drop_nlink(inode); -} - -static int ext4_add_nondir(handle_t *handle, - struct dentry *dentry, struct inode *inode) -{ - int err = ext4_add_entry(handle, dentry, inode); - if (!err) { - ext4_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); - return 0; - } - ext4_dec_count(handle, inode); - iput(inode); - return err; -} - -/* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it - * is so far negative - it has no inode. - * - * If the create succeeds, we fill in the inode information - * with d_instantiate(). - */ -static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, - struct nameidata *nd) -{ - handle_t *handle; - struct inode * inode; - int err, retries = 0; - -retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext4_new_inode (handle, dir, mode); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext4_file_inode_operations; - inode->i_fop = &ext4_file_operations; - ext4_set_aops(inode); - err = ext4_add_nondir(handle, dentry, inode); - } - ext4_journal_stop(handle); - if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -static int ext4_mknod (struct inode * dir, struct dentry *dentry, - int mode, dev_t rdev) -{ - handle_t *handle; - struct inode *inode; - int err, retries = 0; - - if (!new_valid_dev(rdev)) - return -EINVAL; - -retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext4_new_inode (handle, dir, mode); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4DEV_FS_XATTR - inode->i_op = &ext4_special_inode_operations; -#endif - err = ext4_add_nondir(handle, dentry, inode); - } - ext4_journal_stop(handle); - if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) -{ - handle_t *handle; - struct inode * inode; - struct buffer_head * dir_block; - struct ext4_dir_entry_2 * de; - int err, retries = 0; - - if (dir->i_nlink >= EXT4_LINK_MAX) - return -EMLINK; - -retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext4_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; - - inode->i_op = &ext4_dir_inode_operations; - inode->i_fop = &ext4_dir_operations; - inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext4_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - drop_nlink(inode); /* is this nlink == 0? */ - ext4_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; - } - BUFFER_TRACE(dir_block, "get_write_access"); - ext4_journal_get_write_access(handle, dir_block); - de = (struct ext4_dir_entry_2 *) dir_block->b_data; - de->inode = cpu_to_le32(inode->i_ino); - de->name_len = 1; - de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len)); - strcpy (de->name, "."); - ext4_set_de_type(dir->i_sb, de, S_IFDIR); - de = (struct ext4_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); - de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1)); - de->name_len = 2; - strcpy (de->name, ".."); - ext4_set_de_type(dir->i_sb, de, S_IFDIR); - inode->i_nlink = 2; - BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); - ext4_mark_inode_dirty(handle, inode); - err = ext4_add_entry (handle, dentry, inode); - if (err) { - inode->i_nlink = 0; - ext4_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; - } - inc_nlink(dir); - ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); -out_stop: - ext4_journal_stop(handle); - if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -/* - * routine to check that the specified directory is empty (for rmdir) - */ -static int empty_dir (struct inode * inode) -{ - unsigned long offset; - struct buffer_head * bh; - struct ext4_dir_entry_2 * de, * de1; - struct super_block * sb; - int err = 0; - - sb = inode->i_sb; - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || - !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { - if (err) - ext4_error(inode->i_sb, __FUNCTION__, - "error %d reading directory #%lu offset 0", - err, inode->i_ino); - else - ext4_warning(inode->i_sb, __FUNCTION__, - "bad directory (dir #%lu) - no data block", - inode->i_ino); - return 1; - } - de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = (struct ext4_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); - if (le32_to_cpu(de->inode) != inode->i_ino || - !le32_to_cpu(de1->inode) || - strcmp (".", de->name) || - strcmp ("..", de1->name)) { - ext4_warning (inode->i_sb, "empty_dir", - "bad directory (dir #%lu) - no `.' or `..'", - inode->i_ino); - brelse (bh); - return 1; - } - offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); - de = (struct ext4_dir_entry_2 *) - ((char *) de1 + le16_to_cpu(de1->rec_len)); - while (offset < inode->i_size ) { - if (!bh || - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { - err = 0; - brelse (bh); - bh = ext4_bread (NULL, inode, - offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); - if (!bh) { - if (err) - ext4_error(sb, __FUNCTION__, - "error %d reading directory" - " #%lu offset %lu", - err, inode->i_ino, offset); - offset += sb->s_blocksize; - continue; - } - de = (struct ext4_dir_entry_2 *) bh->b_data; - } - if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) { - de = (struct ext4_dir_entry_2 *)(bh->b_data + - sb->s_blocksize); - offset = (offset | (sb->s_blocksize - 1)) + 1; - continue; - } - if (le32_to_cpu(de->inode)) { - brelse (bh); - return 0; - } - offset += le16_to_cpu(de->rec_len); - de = (struct ext4_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); - } - brelse (bh); - return 1; -} - -/* ext4_orphan_add() links an unlinked or truncated inode into a list of - * such inodes, starting at the superblock, in case we crash before the - * file is closed/deleted, or in case the inode truncate spans multiple - * transactions and the last transaction is not recovered after a crash. - * - * At filesystem recovery time, we walk this list deleting unlinked - * inodes and truncating linked inodes in ext4_orphan_cleanup(). - */ -int ext4_orphan_add(handle_t *handle, struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct ext4_iloc iloc; - int err = 0, rc; - - lock_super(sb); - if (!list_empty(&EXT4_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks - * being truncated, or files being unlinked. */ - - /* @@@ FIXME: Observation from aviro: - * I think I can trigger J_ASSERT in ext4_orphan_add(). We block - * here (on lock_super()), so race with ext4_link() which might bump - * ->i_nlink. For, say it, character device. Not a regular file, - * not a directory, not a symlink and ->i_nlink > 0. - */ - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); - if (err) - goto out_unlock; - - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_unlock; - - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); - EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); - rc = ext4_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; - - /* Only add to the head of the in-memory list if all the - * previous operations succeeded. If the orphan_add is going to - * fail (possibly taking the journal offline), we can't risk - * leaving the inode on the orphan list: stray orphan-list - * entries can cause panics at unmount time. - * - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) - list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); - jbd_debug(4, "orphan inode %lu will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); -out_unlock: - unlock_super(sb); - ext4_std_error(inode->i_sb, err); - return err; -} - -/* - * ext4_orphan_del() removes an unlinked or truncated inode from the list - * of such inodes stored on disk, because it is finally being cleaned up. - */ -int ext4_orphan_del(handle_t *handle, struct inode *inode) -{ - struct list_head *prev; - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi; - unsigned long ino_next; - struct ext4_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; - sbi = EXT4_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - - list_del_init(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on - * disk, but we still need to remove the inode from the linked - * list in memory. */ - if (!handle) - goto out; - - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_err; - - if (prev == &sbi->s_orphan) { - jbd_debug(4, "superblock will point to %lu\n", ino_next); - BUFFER_TRACE(sbi->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sbi->s_sbh); - if (err) - goto out_brelse; - sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_journal_dirty_metadata(handle, sbi->s_sbh); - } else { - struct ext4_iloc iloc2; - struct inode *i_prev = - &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; - - jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); - err = ext4_reserve_inode_write(handle, i_prev, &iloc2); - if (err) - goto out_brelse; - NEXT_ORPHAN(i_prev) = ino_next; - err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); - } - if (err) - goto out_brelse; - NEXT_ORPHAN(inode) = 0; - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - -out_err: - ext4_std_error(inode->i_sb, err); -out: - unlock_super(inode->i_sb); - return err; - -out_brelse: - brelse(iloc.bh); - goto out_err; -} - -static int ext4_rmdir (struct inode * dir, struct dentry *dentry) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext4_dir_entry_2 * de; - handle_t *handle; - - /* Initialize quotas before so that eventual writes go in - * separate transaction */ - DQUOT_INIT(dentry->d_inode); - handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - retval = -ENOENT; - bh = ext4_find_entry (dentry, &de); - if (!bh) - goto end_rmdir; - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = dentry->d_inode; - - retval = -EIO; - if (le32_to_cpu(de->inode) != inode->i_ino) - goto end_rmdir; - - retval = -ENOTEMPTY; - if (!empty_dir (inode)) - goto end_rmdir; - - retval = ext4_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; - if (inode->i_nlink != 2) - ext4_warning (inode->i_sb, "ext4_rmdir", - "empty directory has nlink!=2 (%d)", - inode->i_nlink); - inode->i_version++; - clear_nlink(inode); - /* There's no need to set i_disksize: the fact that i_nlink is - * zero will ensure that the right thing happens during any - * recovery. */ - inode->i_size = 0; - ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext4_mark_inode_dirty(handle, inode); - drop_nlink(dir); - ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); - -end_rmdir: - ext4_journal_stop(handle); - brelse (bh); - return retval; -} - -static int ext4_unlink(struct inode * dir, struct dentry *dentry) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext4_dir_entry_2 * de; - handle_t *handle; - - /* Initialize quotas before so that eventual writes go - * in separate transaction */ - DQUOT_INIT(dentry->d_inode); - handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - retval = -ENOENT; - bh = ext4_find_entry (dentry, &de); - if (!bh) - goto end_unlink; - - inode = dentry->d_inode; - - retval = -EIO; - if (le32_to_cpu(de->inode) != inode->i_ino) - goto end_unlink; - - if (!inode->i_nlink) { - ext4_warning (inode->i_sb, "ext4_unlink", - "Deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); - inode->i_nlink = 1; - } - retval = ext4_delete_entry(handle, dir, de, bh); - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext4_update_dx_flag(dir); - ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); - if (!inode->i_nlink) - ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; - ext4_mark_inode_dirty(handle, inode); - retval = 0; - -end_unlink: - ext4_journal_stop(handle); - brelse (bh); - return retval; -} - -static int ext4_symlink (struct inode * dir, - struct dentry *dentry, const char * symname) -{ - handle_t *handle; - struct inode * inode; - int l, err, retries = 0; - - l = strlen(symname)+1; - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; - - if (l > sizeof (EXT4_I(inode)->i_data)) { - inode->i_op = &ext4_symlink_inode_operations; - ext4_set_aops(inode); - /* - * page_symlink() calls into ext4_prepare/commit_write. - * We have a transaction open. All is sweetness. It also sets - * i_size in generic_commit_write(). - */ - err = __page_symlink(inode, symname, l, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); - if (err) { - ext4_dec_count(handle, inode); - ext4_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; - } - } else { - inode->i_op = &ext4_fast_symlink_inode_operations; - memcpy((char*)&EXT4_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } - EXT4_I(inode)->i_disksize = inode->i_size; - err = ext4_add_nondir(handle, dentry, inode); -out_stop: - ext4_journal_stop(handle); - if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -static int ext4_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) -{ - handle_t *handle; - struct inode *inode = old_dentry->d_inode; - int err, retries = 0; - - if (inode->i_nlink >= EXT4_LINK_MAX) - return -EMLINK; - -retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode->i_ctime = CURRENT_TIME_SEC; - ext4_inc_count(handle, inode); - atomic_inc(&inode->i_count); - - err = ext4_add_nondir(handle, dentry, inode); - ext4_journal_stop(handle); - if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -#define PARENT_INO(buffer) \ - ((struct ext4_dir_entry_2 *) ((char *) buffer + \ - le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode - -/* - * Anybody can rename anything with this: the permission checks are left to the - * higher-level routines. - */ -static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) -{ - handle_t *handle; - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh, * dir_bh; - struct ext4_dir_entry_2 * old_de, * new_de; - int retval; - - old_bh = new_bh = dir_bh = NULL; - - /* Initialize quotas before so that eventual writes go - * in separate transaction */ - if (new_dentry->d_inode) - DQUOT_INIT(new_dentry->d_inode); - handle = ext4_journal_start(old_dir, 2 * - EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) - handle->h_sync = 1; - - old_bh = ext4_find_entry (old_dentry, &old_de); - /* - * Check for inode number is _not_ due to possible IO errors. - * We might rmdir the source, keep it as pwd of some process - * and merrily kill the link to whatever was created under the - * same name. Goodbye sticky bit ;-< - */ - old_inode = old_dentry->d_inode; - retval = -ENOENT; - if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) - goto end_rename; - - new_inode = new_dentry->d_inode; - new_bh = ext4_find_entry (new_dentry, &new_de); - if (new_bh) { - if (!new_inode) { - brelse (new_bh); - new_bh = NULL; - } - } - if (S_ISDIR(old_inode->i_mode)) { - if (new_inode) { - retval = -ENOTEMPTY; - if (!empty_dir (new_inode)) - goto end_rename; - } - retval = -EIO; - dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); - if (!dir_bh) - goto end_rename; - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; - if (!new_inode && new_dir!=old_dir && - new_dir->i_nlink >= EXT4_LINK_MAX) - goto end_rename; - } - if (!new_bh) { - retval = ext4_add_entry (handle, new_dentry, old_inode); - if (retval) - goto end_rename; - } else { - BUFFER_TRACE(new_bh, "get write access"); - ext4_journal_get_write_access(handle, new_bh); - new_de->inode = cpu_to_le32(old_inode->i_ino); - if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, - EXT4_FEATURE_INCOMPAT_FILETYPE)) - new_de->file_type = old_de->file_type; - new_dir->i_version++; - BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, new_bh); - brelse(new_bh); - new_bh = NULL; - } - - /* - * Like most other Unix systems, set the ctime for inodes on a - * rename. - */ - old_inode->i_ctime = CURRENT_TIME_SEC; - ext4_mark_inode_dirty(handle, old_inode); - - /* - * ok, that's it - */ - if (le32_to_cpu(old_de->inode) != old_inode->i_ino || - old_de->name_len != old_dentry->d_name.len || - strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || - (retval = ext4_delete_entry(handle, old_dir, - old_de, old_bh)) == -ENOENT) { - /* old_de could have moved from under us during htree split, so - * make sure that we are deleting the right entry. We might - * also be pointing to a stale entry in the unused part of - * old_bh so just checking inum and the name isn't enough. */ - struct buffer_head *old_bh2; - struct ext4_dir_entry_2 *old_de2; - - old_bh2 = ext4_find_entry(old_dentry, &old_de2); - if (old_bh2) { - retval = ext4_delete_entry(handle, old_dir, - old_de2, old_bh2); - brelse(old_bh2); - } - } - if (retval) { - ext4_warning(old_dir->i_sb, "ext4_rename", - "Deleting old file (%lu), %d, error=%d", - old_dir->i_ino, old_dir->i_nlink, retval); - } - - if (new_inode) { - drop_nlink(new_inode); - new_inode->i_ctime = CURRENT_TIME_SEC; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; - ext4_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext4_journal_get_write_access(handle, dir_bh); - PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, dir_bh); - drop_nlink(old_dir); - if (new_inode) { - drop_nlink(new_inode); - } else { - inc_nlink(new_dir); - ext4_update_dx_flag(new_dir); - ext4_mark_inode_dirty(handle, new_dir); - } - } - ext4_mark_inode_dirty(handle, old_dir); - if (new_inode) { - ext4_mark_inode_dirty(handle, new_inode); - if (!new_inode->i_nlink) - ext4_orphan_add(handle, new_inode); - } - retval = 0; - -end_rename: - brelse (dir_bh); - brelse (old_bh); - brelse (new_bh); - ext4_journal_stop(handle); - return retval; -} - -/* - * directories can handle most operations... - */ -struct inode_operations ext4_dir_inode_operations = { - .create = ext4_create, - .lookup = ext4_lookup, - .link = ext4_link, - .unlink = ext4_unlink, - .symlink = ext4_symlink, - .mkdir = ext4_mkdir, - .rmdir = ext4_rmdir, - .mknod = ext4_mknod, - .rename = ext4_rename, - .setattr = ext4_setattr, -#ifdef CONFIG_EXT4DEV_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext4_listxattr, - .removexattr = generic_removexattr, -#endif - .permission = ext4_permission, -}; - -struct inode_operations ext4_special_inode_operations = { - .setattr = ext4_setattr, -#ifdef CONFIG_EXT4DEV_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext4_listxattr, - .removexattr = generic_removexattr, -#endif - .permission = ext4_permission, -}; diff --git a/trunk/fs/ext4/namei.h b/trunk/fs/ext4/namei.h deleted file mode 100644 index 5e4dfff36a00..000000000000 --- a/trunk/fs/ext4/namei.h +++ /dev/null @@ -1,8 +0,0 @@ -/* linux/fs/ext4/namei.h - * - * Copyright (C) 2005 Simtec Electronics - * Ben Dooks - * -*/ - -extern struct dentry *ext4_get_parent(struct dentry *child); diff --git a/trunk/fs/ext4/resize.c b/trunk/fs/ext4/resize.c deleted file mode 100644 index 1e9578052cd3..000000000000 --- a/trunk/fs/ext4/resize.c +++ /dev/null @@ -1,1045 +0,0 @@ -/* - * linux/fs/ext4/resize.c - * - * Support for resizing an ext4 filesystem while it is mounted. - * - * Copyright (C) 2001, 2002 Andreas Dilger - * - * This could probably be made into a module, because it is not often in use. - */ - - -#define EXT4FS_DEBUG - -#include -#include -#include - -#include -#include - - -#define outside(b, first, last) ((b) < (first) || (b) >= (last)) -#define inside(b, first, last) ((b) >= (first) && (b) < (last)) - -static int verify_group_input(struct super_block *sb, - struct ext4_new_group_data *input) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - ext4_fsblk_t start = ext4_blocks_count(es); - ext4_fsblk_t end = start + input->blocks_count; - unsigned group = input->group; - ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; - unsigned overhead = ext4_bg_has_super(sb, group) ? - (1 + ext4_bg_num_gdb(sb, group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; - ext4_fsblk_t metaend = start + overhead; - struct buffer_head *bh = NULL; - ext4_grpblk_t free_blocks_count, offset; - int err = -EINVAL; - - input->free_blocks_count = free_blocks_count = - input->blocks_count - 2 - overhead - sbi->s_itb_per_group; - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " - "(%d free, %u reserved)\n", - ext4_bg_has_super(sb, input->group) ? "normal" : - "no-super", input->group, input->blocks_count, - free_blocks_count, input->reserved_blocks); - - ext4_get_group_no_and_offset(sb, start, NULL, &offset); - if (group != sbi->s_groups_count) - ext4_warning(sb, __FUNCTION__, - "Cannot add at group %u (only %lu groups)", - input->group, sbi->s_groups_count); - else if (offset != 0) - ext4_warning(sb, __FUNCTION__, "Last group not full"); - else if (input->reserved_blocks > input->blocks_count / 5) - ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", - input->reserved_blocks); - else if (free_blocks_count < 0) - ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", - input->blocks_count); - else if (!(bh = sb_bread(sb, end - 1))) - ext4_warning(sb, __FUNCTION__, - "Cannot read last block (%llu)", - end - 1); - else if (outside(input->block_bitmap, start, end)) - ext4_warning(sb, __FUNCTION__, - "Block bitmap not in group (block %llu)", - input->block_bitmap); - else if (outside(input->inode_bitmap, start, end)) - ext4_warning(sb, __FUNCTION__, - "Inode bitmap not in group (block %llu)", - input->inode_bitmap); - else if (outside(input->inode_table, start, end) || - outside(itend - 1, start, end)) - ext4_warning(sb, __FUNCTION__, - "Inode table not in group (blocks %llu-%llu)", - input->inode_table, itend - 1); - else if (input->inode_bitmap == input->block_bitmap) - ext4_warning(sb, __FUNCTION__, - "Block bitmap same as inode bitmap (%llu)", - input->block_bitmap); - else if (inside(input->block_bitmap, input->inode_table, itend)) - ext4_warning(sb, __FUNCTION__, - "Block bitmap (%llu) in inode table (%llu-%llu)", - input->block_bitmap, input->inode_table, itend-1); - else if (inside(input->inode_bitmap, input->inode_table, itend)) - ext4_warning(sb, __FUNCTION__, - "Inode bitmap (%llu) in inode table (%llu-%llu)", - input->inode_bitmap, input->inode_table, itend-1); - else if (inside(input->block_bitmap, start, metaend)) - ext4_warning(sb, __FUNCTION__, - "Block bitmap (%llu) in GDT table" - " (%llu-%llu)", - input->block_bitmap, start, metaend - 1); - else if (inside(input->inode_bitmap, start, metaend)) - ext4_warning(sb, __FUNCTION__, - "Inode bitmap (%llu) in GDT table" - " (%llu-%llu)", - input->inode_bitmap, start, metaend - 1); - else if (inside(input->inode_table, start, metaend) || - inside(itend - 1, start, metaend)) - ext4_warning(sb, __FUNCTION__, - "Inode table (%llu-%llu) overlaps" - "GDT table (%llu-%llu)", - input->inode_table, itend - 1, start, metaend - 1); - else - err = 0; - brelse(bh); - - return err; -} - -static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, - ext4_fsblk_t blk) -{ - struct buffer_head *bh; - int err; - - bh = sb_getblk(sb, blk); - if (!bh) - return ERR_PTR(-EIO); - if ((err = ext4_journal_get_write_access(handle, bh))) { - brelse(bh); - bh = ERR_PTR(err); - } else { - lock_buffer(bh); - memset(bh->b_data, 0, sb->s_blocksize); - set_buffer_uptodate(bh); - unlock_buffer(bh); - } - - return bh; -} - -/* - * To avoid calling the atomic setbit hundreds or thousands of times, we only - * need to use it within a single byte (to ensure we get endianness right). - * We can use memset for the rest of the bitmap as there are no other users. - */ -static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -{ - int i; - - if (start_bit >= end_bit) - return; - - ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); - for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) - ext4_set_bit(i, bitmap); - if (i < end_bit) - memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -} - -/* - * Set up the block and inode bitmaps, and the inode table for the new group. - * This doesn't need to be part of the main transaction, since we are only - * changing blocks outside the actual filesystem. We still do journaling to - * ensure the recovery is correct in case of a failure just after resize. - * If any part of this fails, we simply abort the resize. - */ -static int setup_new_group_blocks(struct super_block *sb, - struct ext4_new_group_data *input) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); - int reserved_gdb = ext4_bg_has_super(sb, input->group) ? - le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; - unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); - struct buffer_head *bh; - handle_t *handle; - ext4_fsblk_t block; - ext4_grpblk_t bit; - int i; - int err = 0, err2; - - handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks + - 2 + sbi->s_itb_per_group); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - lock_super(sb); - if (input->group != sbi->s_groups_count) { - err = -EBUSY; - goto exit_journal; - } - - if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - - if (ext4_bg_has_super(sb, input->group)) { - ext4_debug("mark backup superblock %#04lx (+0)\n", start); - ext4_set_bit(0, bh->b_data); - } - - /* Copy all of the GDT blocks into the backup in this group */ - for (i = 0, bit = 1, block = start + 1; - i < gdblocks; i++, block++, bit++) { - struct buffer_head *gdb; - - ext4_debug("update backup group %#04lx (+%d)\n", block, bit); - - gdb = sb_getblk(sb, block); - if (!gdb) { - err = -EIO; - goto exit_bh; - } - if ((err = ext4_journal_get_write_access(handle, gdb))) { - brelse(gdb); - goto exit_bh; - } - lock_buffer(bh); - memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); - set_buffer_uptodate(gdb); - unlock_buffer(bh); - ext4_journal_dirty_metadata(handle, gdb); - ext4_set_bit(bit, bh->b_data); - brelse(gdb); - } - - /* Zero out all of the reserved backup group descriptor table blocks */ - for (i = 0, bit = gdblocks + 1, block = start + bit; - i < reserved_gdb; i++, block++, bit++) { - struct buffer_head *gdb; - - ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); - - if (IS_ERR(gdb = bclean(handle, sb, block))) { - err = PTR_ERR(bh); - goto exit_bh; - } - ext4_journal_dirty_metadata(handle, gdb); - ext4_set_bit(bit, bh->b_data); - brelse(gdb); - } - ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, - input->block_bitmap - start); - ext4_set_bit(input->block_bitmap - start, bh->b_data); - ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, - input->inode_bitmap - start); - ext4_set_bit(input->inode_bitmap - start, bh->b_data); - - /* Zero out all of the inode table blocks */ - for (i = 0, block = input->inode_table, bit = block - start; - i < sbi->s_itb_per_group; i++, bit++, block++) { - struct buffer_head *it; - - ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); - if (IS_ERR(it = bclean(handle, sb, block))) { - err = PTR_ERR(it); - goto exit_bh; - } - ext4_journal_dirty_metadata(handle, it); - brelse(it); - ext4_set_bit(bit, bh->b_data); - } - mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), - bh->b_data); - ext4_journal_dirty_metadata(handle, bh); - brelse(bh); - - /* Mark unused entries in inode bitmap used */ - ext4_debug("clear inode bitmap %#04x (+%ld)\n", - input->inode_bitmap, input->inode_bitmap - start); - if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), - bh->b_data); - ext4_journal_dirty_metadata(handle, bh); -exit_bh: - brelse(bh); - -exit_journal: - unlock_super(sb); - if ((err2 = ext4_journal_stop(handle)) && !err) - err = err2; - - return err; -} - - -/* - * Iterate through the groups which hold BACKUP superblock/GDT copies in an - * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before - * calling this for the first time. In a sparse filesystem it will be the - * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... - * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... - */ -static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, - unsigned *five, unsigned *seven) -{ - unsigned *min = three; - int mult = 3; - unsigned ret; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ret = *min; - *min += 1; - return ret; - } - - if (*five < *min) { - min = five; - mult = 5; - } - if (*seven < *min) { - min = seven; - mult = 7; - } - - ret = *min; - *min *= mult; - - return ret; -} - -/* - * Check that all of the backup GDT blocks are held in the primary GDT block. - * It is assumed that they are stored in group order. Returns the number of - * groups in current filesystem that have BACKUPS, or -ve error code. - */ -static int verify_reserved_gdb(struct super_block *sb, - struct buffer_head *primary) -{ - const ext4_fsblk_t blk = primary->b_blocknr; - const unsigned long end = EXT4_SB(sb)->s_groups_count; - unsigned three = 1; - unsigned five = 5; - unsigned seven = 7; - unsigned grp; - __le32 *p = (__le32 *)primary->b_data; - int gdbackups = 0; - - while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { - if (le32_to_cpu(*p++) != - grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ - ext4_warning(sb, __FUNCTION__, - "reserved GDT %llu" - " missing grp %d (%llu)", - blk, grp, - grp * - (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + - blk); - return -EINVAL; - } - if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb)) - return -EFBIG; - } - - return gdbackups; -} - -/* - * Called when we need to bring a reserved group descriptor table block into - * use from the resize inode. The primary copy of the new GDT block currently - * is an indirect block (under the double indirect block in the resize inode). - * The new backup GDT blocks will be stored as leaf blocks in this indirect - * block, in group order. Even though we know all the block numbers we need, - * we check to ensure that the resize inode has actually reserved these blocks. - * - * Don't need to update the block bitmaps because the blocks are still in use. - * - * We get all of the error cases out of the way, so that we are sure to not - * fail once we start modifying the data on disk, because JBD has no rollback. - */ -static int add_new_gdb(handle_t *handle, struct inode *inode, - struct ext4_new_group_data *input, - struct buffer_head **primary) -{ - struct super_block *sb = inode->i_sb; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); - ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; - struct buffer_head **o_group_desc, **n_group_desc; - struct buffer_head *dind; - int gdbackups; - struct ext4_iloc iloc; - __le32 *data; - int err; - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG - "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", - gdb_num); - - /* - * If we are not using the primary superblock/GDT copy don't resize, - * because the user tools have no way of handling this. Probably a - * bad time to do it anyways. - */ - if (EXT4_SB(sb)->s_sbh->b_blocknr != - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, __FUNCTION__, - "won't resize using backup superblock at %llu", - (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); - return -EPERM; - } - - *primary = sb_bread(sb, gdblock); - if (!*primary) - return -EIO; - - if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { - err = gdbackups; - goto exit_bh; - } - - data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; - goto exit_bh; - } - - data = (__le32 *)dind->b_data; - if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { - ext4_warning(sb, __FUNCTION__, - "new group %u GDT block %llu not reserved", - input->group, gdblock); - err = -EINVAL; - goto exit_dind; - } - - if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) - goto exit_dind; - - if ((err = ext4_journal_get_write_access(handle, *primary))) - goto exit_sbh; - - if ((err = ext4_journal_get_write_access(handle, dind))) - goto exit_primary; - - /* ext4_reserve_inode_write() gets a reference on the iloc */ - if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) - goto exit_dindj; - - n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), - GFP_KERNEL); - if (!n_group_desc) { - err = -ENOMEM; - ext4_warning (sb, __FUNCTION__, - "not enough memory for %lu groups", gdb_num + 1); - goto exit_inode; - } - - /* - * Finally, we have all of the possible failures behind us... - * - * Remove new GDT block from inode double-indirect block and clear out - * the new GDT block for use (which also "frees" the backup GDT blocks - * from the reserved inode). We don't need to change the bitmaps for - * these blocks, because they are marked as in-use from being in the - * reserved inode, and will become GDT blocks (primary and backup). - */ - data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; - ext4_journal_dirty_metadata(handle, dind); - brelse(dind); - inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; - ext4_mark_iloc_dirty(handle, inode, &iloc); - memset((*primary)->b_data, 0, sb->s_blocksize); - ext4_journal_dirty_metadata(handle, *primary); - - o_group_desc = EXT4_SB(sb)->s_group_desc; - memcpy(n_group_desc, o_group_desc, - EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); - n_group_desc[gdb_num] = *primary; - EXT4_SB(sb)->s_group_desc = n_group_desc; - EXT4_SB(sb)->s_gdb_count++; - kfree(o_group_desc); - - es->s_reserved_gdt_blocks = - cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1); - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); - - return 0; - -exit_inode: - //ext4_journal_release_buffer(handle, iloc.bh); - brelse(iloc.bh); -exit_dindj: - //ext4_journal_release_buffer(handle, dind); -exit_primary: - //ext4_journal_release_buffer(handle, *primary); -exit_sbh: - //ext4_journal_release_buffer(handle, *primary); -exit_dind: - brelse(dind); -exit_bh: - brelse(*primary); - - ext4_debug("leaving with error %d\n", err); - return err; -} - -/* - * Called when we are adding a new group which has a backup copy of each of - * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. - * We need to add these reserved backup GDT blocks to the resize inode, so - * that they are kept for future resizing and not allocated to files. - * - * Each reserved backup GDT block will go into a different indirect block. - * The indirect blocks are actually the primary reserved GDT blocks, - * so we know in advance what their block numbers are. We only get the - * double-indirect block to verify it is pointing to the primary reserved - * GDT blocks so we don't overwrite a data block by accident. The reserved - * backup GDT blocks are stored in their reserved primary GDT block. - */ -static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - struct ext4_new_group_data *input) -{ - struct super_block *sb = inode->i_sb; - int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); - struct buffer_head **primary; - struct buffer_head *dind; - struct ext4_iloc iloc; - ext4_fsblk_t blk; - __le32 *data, *end; - int gdbackups = 0; - int res, i; - int err; - - primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); - if (!primary) - return -ENOMEM; - - data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; - goto exit_free; - } - - blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; - end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); - - /* Get each reserved primary GDT block and verify it holds backups */ - for (res = 0; res < reserved_gdb; res++, blk++) { - if (le32_to_cpu(*data) != blk) { - ext4_warning(sb, __FUNCTION__, - "reserved block %llu" - " not at offset %ld", - blk, - (long)(data - (__le32 *)dind->b_data)); - err = -EINVAL; - goto exit_bh; - } - primary[res] = sb_bread(sb, blk); - if (!primary[res]) { - err = -EIO; - goto exit_bh; - } - if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { - brelse(primary[res]); - err = gdbackups; - goto exit_bh; - } - if (++data >= end) - data = (__le32 *)dind->b_data; - } - - for (i = 0; i < reserved_gdb; i++) { - if ((err = ext4_journal_get_write_access(handle, primary[i]))) { - /* - int j; - for (j = 0; j < i; j++) - ext4_journal_release_buffer(handle, primary[j]); - */ - goto exit_bh; - } - } - - if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) - goto exit_bh; - - /* - * Finally we can add each of the reserved backup GDT blocks from - * the new group to its reserved primary GDT block. - */ - blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); - for (i = 0; i < reserved_gdb; i++) { - int err2; - data = (__le32 *)primary[i]->b_data; - /* printk("reserving backup %lu[%u] = %lu\n", - primary[i]->b_blocknr, gdbackups, - blk + primary[i]->b_blocknr); */ - data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); - err2 = ext4_journal_dirty_metadata(handle, primary[i]); - if (!err) - err = err2; - } - inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; - ext4_mark_iloc_dirty(handle, inode, &iloc); - -exit_bh: - while (--res >= 0) - brelse(primary[res]); - brelse(dind); - -exit_free: - kfree(primary); - - return err; -} - -/* - * Update the backup copies of the ext4 metadata. These don't need to be part - * of the main resize transaction, because e2fsck will re-write them if there - * is a problem (basically only OOM will cause a problem). However, we - * _should_ update the backups if possible, in case the primary gets trashed - * for some reason and we need to run e2fsck from a backup superblock. The - * important part is that the new block and inode counts are in the backup - * superblocks, and the location of the new group metadata in the GDT backups. - * - * We do not need lock_super() for this, because these blocks are not - * otherwise touched by the filesystem code when it is mounted. We don't - * need to worry about last changing from sbi->s_groups_count, because the - * worst that can happen is that we do not copy the full number of backups - * at this time. The resize which changed s_groups_count will backup again. - */ -static void update_backups(struct super_block *sb, - int blk_off, char *data, int size) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - const unsigned long last = sbi->s_groups_count; - const int bpg = EXT4_BLOCKS_PER_GROUP(sb); - unsigned three = 1; - unsigned five = 5; - unsigned seven = 7; - unsigned group; - int rest = sb->s_blocksize - size; - handle_t *handle; - int err = 0, err2; - - handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); - if (IS_ERR(handle)) { - group = 1; - err = PTR_ERR(handle); - goto exit_err; - } - - while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { - struct buffer_head *bh; - - /* Out of journal space, and can't get more - abort - so sad */ - if (handle->h_buffer_credits == 0 && - ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && - (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) - break; - - bh = sb_getblk(sb, group * bpg + blk_off); - if (!bh) { - err = -EIO; - break; - } - ext4_debug("update metadata backup %#04lx\n", - (unsigned long)bh->b_blocknr); - if ((err = ext4_journal_get_write_access(handle, bh))) - break; - lock_buffer(bh); - memcpy(bh->b_data, data, size); - if (rest) - memset(bh->b_data + size, 0, rest); - set_buffer_uptodate(bh); - unlock_buffer(bh); - ext4_journal_dirty_metadata(handle, bh); - brelse(bh); - } - if ((err2 = ext4_journal_stop(handle)) && !err) - err = err2; - - /* - * Ugh! Need to have e2fsck write the backup copies. It is too - * late to revert the resize, we shouldn't fail just because of - * the backup copies (they are only needed in case of corruption). - * - * However, if we got here we have a journal problem too, so we - * can't really start a transaction to mark the superblock. - * Chicken out and just set the flag on the hope it will be written - * to disk, and if not - we will simply wait until next fsck. - */ -exit_err: - if (err) { - ext4_warning(sb, __FUNCTION__, - "can't update backup for group %d (err %d), " - "forcing fsck on next reboot", group, err); - sbi->s_mount_state &= ~EXT4_VALID_FS; - sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); - mark_buffer_dirty(sbi->s_sbh); - } -} - -/* Add group descriptor data to an existing or new group descriptor block. - * Ensure we handle all possible error conditions _before_ we start modifying - * the filesystem, because we cannot abort the transaction and not have it - * write the data to disk. - * - * If we are on a GDT block boundary, we need to get the reserved GDT block. - * Otherwise, we may need to add backup GDT blocks for a sparse group. - * - * We only need to hold the superblock lock while we are actually adding - * in the new group's counts to the superblock. Prior to that we have - * not really "added" the group at all. We re-check that we are still - * adding in the last group in case things have changed since verifying. - */ -int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - int reserved_gdb = ext4_bg_has_super(sb, input->group) ? - le16_to_cpu(es->s_reserved_gdt_blocks) : 0; - struct buffer_head *primary = NULL; - struct ext4_group_desc *gdp; - struct inode *inode = NULL; - handle_t *handle; - int gdb_off, gdb_num; - int err, err2; - - gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); - gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); - - if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ext4_warning(sb, __FUNCTION__, - "Can't resize non-sparse filesystem further"); - return -EPERM; - } - - if (ext4_blocks_count(es) + input->blocks_count < - ext4_blocks_count(es)) { - ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); - return -EINVAL; - } - - if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < - le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); - return -EINVAL; - } - - if (reserved_gdb || gdb_off == 0) { - if (!EXT4_HAS_COMPAT_FEATURE(sb, - EXT4_FEATURE_COMPAT_RESIZE_INODE)){ - ext4_warning(sb, __FUNCTION__, - "No reserved GDT blocks, can't resize"); - return -EPERM; - } - inode = iget(sb, EXT4_RESIZE_INO); - if (!inode || is_bad_inode(inode)) { - ext4_warning(sb, __FUNCTION__, - "Error opening resize inode"); - iput(inode); - return -ENOENT; - } - } - - if ((err = verify_group_input(sb, input))) - goto exit_put; - - if ((err = setup_new_group_blocks(sb, input))) - goto exit_put; - - /* - * We will always be modifying at least the superblock and a GDT - * block. If we are adding a group past the last current GDT block, - * we will also modify the inode and the dindirect block. If we - * are adding a group with superblock/GDT backups we will also - * modify each of the reserved GDT dindirect blocks. - */ - handle = ext4_journal_start_sb(sb, - ext4_bg_has_super(sb, input->group) ? - 3 + reserved_gdb : 4); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto exit_put; - } - - lock_super(sb); - if (input->group != sbi->s_groups_count) { - ext4_warning(sb, __FUNCTION__, - "multiple resizers run on filesystem!"); - err = -EBUSY; - goto exit_journal; - } - - if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) - goto exit_journal; - - /* - * We will only either add reserved group blocks to a backup group - * or remove reserved blocks for the first group in a new group block. - * Doing both would be mean more complex code, and sane people don't - * use non-sparse filesystems anymore. This is already checked above. - */ - if (gdb_off) { - primary = sbi->s_group_desc[gdb_num]; - if ((err = ext4_journal_get_write_access(handle, primary))) - goto exit_journal; - - if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && - (err = reserve_backup_gdb(handle, inode, input))) - goto exit_journal; - } else if ((err = add_new_gdb(handle, inode, input, &primary))) - goto exit_journal; - - /* - * OK, now we've set up the new group. Time to make it active. - * - * Current kernels don't lock all allocations via lock_super(), - * so we have to be safe wrt. concurrent accesses the group - * data. So we need to be careful to set all of the relevant - * group descriptor data etc. *before* we enable the group. - * - * The key field here is sbi->s_groups_count: as long as - * that retains its old value, nobody is going to access the new - * group. - * - * So first we update all the descriptor metadata for the new - * group; then we update the total disk blocks count; then we - * update the groups count to enable the group; then finally we - * update the free space counts so that the system can start - * using the new disk blocks. - */ - - /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; - - ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ - ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ - ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); - - /* - * Make the new blocks and inodes valid next. We do this before - * increasing the group count so that once the group is enabled, - * all of its blocks and inodes are already valid. - * - * We always allocate group-by-group, then block-by-block or - * inode-by-inode within a group, so enabling these - * blocks/inodes before the group is live won't actually let us - * allocate the new space yet. - */ - ext4_blocks_count_set(es, ext4_blocks_count(es) + - input->blocks_count); - es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + - EXT4_INODES_PER_GROUP(sb)); - - /* - * We need to protect s_groups_count against other CPUs seeing - * inconsistent state in the superblock. - * - * The precise rules we use are: - * - * * Writers of s_groups_count *must* hold lock_super - * AND - * * Writers must perform a smp_wmb() after updating all dependent - * data and before modifying the groups count - * - * * Readers must hold lock_super() over the access - * OR - * * Readers must perform an smp_rmb() after reading the groups count - * and before reading any dependent data. - * - * NB. These rules can be relaxed when checking the group count - * while freeing data, as we can only allocate from a block - * group after serialising against the group count, and we can - * only then free after serialising in turn against that - * allocation. - */ - smp_wmb(); - - /* Update the global fs size fields */ - sbi->s_groups_count++; - - ext4_journal_dirty_metadata(handle, primary); - - /* Update the reserved block counts only once the new group is - * active. */ - ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + - input->reserved_blocks); - - /* Update the free space counts */ - percpu_counter_mod(&sbi->s_freeblocks_counter, - input->free_blocks_count); - percpu_counter_mod(&sbi->s_freeinodes_counter, - EXT4_INODES_PER_GROUP(sb)); - - ext4_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; - -exit_journal: - unlock_super(sb); - if ((err2 = ext4_journal_stop(handle)) && !err) - err = err2; - if (!err) { - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); - update_backups(sb, primary->b_blocknr, primary->b_data, - primary->b_size); - } -exit_put: - iput(inode); - return err; -} /* ext4_group_add */ - -/* Extend the filesystem to the new number of blocks specified. This entry - * point is only used to extend the current filesystem to the end of the last - * existing group. It can be accessed via ioctl, or by "remount,resize=" - * for emergencies (because it has no dependencies on reserved blocks). - * - * If we _really_ wanted, we could use default values to call ext4_group_add() - * allow the "remount" trick to work for arbitrary resizing, assuming enough - * GDT blocks are reserved to grow to the desired size. - */ -int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, - ext4_fsblk_t n_blocks_count) -{ - ext4_fsblk_t o_blocks_count; - unsigned long o_groups_count; - ext4_grpblk_t last; - ext4_grpblk_t add; - struct buffer_head * bh; - handle_t *handle; - int err; - unsigned long freed_blocks; - - /* We don't need to worry about locking wrt other resizers just - * yet: we're going to revalidate es->s_blocks_count after - * taking lock_super() below. */ - o_blocks_count = ext4_blocks_count(es); - o_groups_count = EXT4_SB(sb)->s_groups_count; - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", - o_blocks_count, n_blocks_count); - - if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) - return 0; - - if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT4-fs: filesystem on %s:" - " too large to resize to %llu blocks safely\n", - sb->s_id, n_blocks_count); - if (sizeof(sector_t) < 8) - ext4_warning(sb, __FUNCTION__, - "CONFIG_LBD not enabled\n"); - return -EINVAL; - } - - if (n_blocks_count < o_blocks_count) { - ext4_warning(sb, __FUNCTION__, - "can't shrink FS - resize aborted"); - return -EBUSY; - } - - /* Handle the remaining blocks in the last group only. */ - ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); - - if (last == 0) { - ext4_warning(sb, __FUNCTION__, - "need to use ext2online to resize further"); - return -EPERM; - } - - add = EXT4_BLOCKS_PER_GROUP(sb) - last; - - if (o_blocks_count + add < o_blocks_count) { - ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); - return -EINVAL; - } - - if (o_blocks_count + add > n_blocks_count) - add = n_blocks_count - o_blocks_count; - - if (o_blocks_count + add < n_blocks_count) - ext4_warning(sb, __FUNCTION__, - "will only finish group (%llu" - " blocks, %u new)", - o_blocks_count + add, add); - - /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, o_blocks_count + add -1); - if (!bh) { - ext4_warning(sb, __FUNCTION__, - "can't read last block, resize aborted"); - return -ENOSPC; - } - brelse(bh); - - /* We will update the superblock, one block bitmap, and - * one group descriptor via ext4_free_blocks(). - */ - handle = ext4_journal_start_sb(sb, 3); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); - goto exit_put; - } - - lock_super(sb); - if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, __FUNCTION__, - "multiple resizers run on filesystem!"); - unlock_super(sb); - err = -EBUSY; - goto exit_put; - } - - if ((err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh))) { - ext4_warning(sb, __FUNCTION__, - "error %d on journal write access", err); - unlock_super(sb); - ext4_journal_stop(handle); - goto exit_put; - } - ext4_blocks_count_set(es, o_blocks_count + add); - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); - sb->s_dirt = 1; - unlock_super(sb); - ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count, - o_blocks_count + add); - ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); - ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, - o_blocks_count + add); - if ((err = ext4_journal_stop(handle))) - goto exit_put; - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", - ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); -exit_put: - return err; -} /* ext4_group_extend */ diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c deleted file mode 100644 index b4b022aa2bc2..000000000000 --- a/trunk/fs/ext4/super.c +++ /dev/null @@ -1,2829 +0,0 @@ -/* - * linux/fs/ext4/super.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "xattr.h" -#include "acl.h" -#include "namei.h" - -static int ext4_load_journal(struct super_block *, struct ext4_super_block *, - unsigned long journal_devnum); -static int ext4_create_journal(struct super_block *, struct ext4_super_block *, - unsigned int); -static void ext4_commit_super (struct super_block * sb, - struct ext4_super_block * es, - int sync); -static void ext4_mark_recovery_complete(struct super_block * sb, - struct ext4_super_block * es); -static void ext4_clear_journal_err(struct super_block * sb, - struct ext4_super_block * es); -static int ext4_sync_fs(struct super_block *sb, int wait); -static const char *ext4_decode_error(struct super_block * sb, int errno, - char nbuf[16]); -static int ext4_remount (struct super_block * sb, int * flags, char * data); -static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); -static void ext4_unlockfs(struct super_block *sb); -static void ext4_write_super (struct super_block * sb); -static void ext4_write_super_lockfs(struct super_block *sb); - - -ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, - struct ext4_group_desc *bg) -{ - return le32_to_cpu(bg->bg_block_bitmap) | - (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); -} - -ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, - struct ext4_group_desc *bg) -{ - return le32_to_cpu(bg->bg_inode_bitmap) | - (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); -} - -ext4_fsblk_t ext4_inode_table(struct super_block *sb, - struct ext4_group_desc *bg) -{ - return le32_to_cpu(bg->bg_inode_table) | - (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); -} - -void ext4_block_bitmap_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk) -{ - bg->bg_block_bitmap = cpu_to_le32((u32)blk); - if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) - bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); -} - -void ext4_inode_bitmap_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk) -{ - bg->bg_inode_bitmap = cpu_to_le32((u32)blk); - if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) - bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); -} - -void ext4_inode_table_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk) -{ - bg->bg_inode_table = cpu_to_le32((u32)blk); - if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) - bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); -} - -/* - * Wrappers for jbd2_journal_start/end. - * - * The only special thing we need to do here is to make sure that all - * journal_end calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. - */ -handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) -{ - journal_t *journal; - - if (sb->s_flags & MS_RDONLY) - return ERR_PTR(-EROFS); - - /* Special case here: if the journal has aborted behind our - * backs (eg. EIO in the commit thread), then we still need to - * take the FS itself readonly cleanly. */ - journal = EXT4_SB(sb)->s_journal; - if (is_journal_aborted(journal)) { - ext4_abort(sb, __FUNCTION__, - "Detected aborted journal"); - return ERR_PTR(-EROFS); - } - - return jbd2_journal_start(journal, nblocks); -} - -/* - * The only special thing we need to do here is to make sure that all - * jbd2_journal_stop calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. - */ -int __ext4_journal_stop(const char *where, handle_t *handle) -{ - struct super_block *sb; - int err; - int rc; - - sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; - rc = jbd2_journal_stop(handle); - - if (!err) - err = rc; - if (err) - __ext4_std_error(sb, where, err); - return err; -} - -void ext4_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err) -{ - char nbuf[16]; - const char *errstr = ext4_decode_error(NULL, err, nbuf); - - if (bh) - BUFFER_TRACE(bh, "abort"); - - if (!handle->h_err) - handle->h_err = err; - - if (is_handle_aborted(handle)) - return; - - printk(KERN_ERR "%s: aborting transaction: %s in %s\n", - caller, errstr, err_fn); - - jbd2_journal_abort_handle(handle); -} - -/* Deal with the reporting of failure conditions on a filesystem such as - * inconsistencies detected or read IO failures. - * - * On ext2, we can store the error state of the filesystem in the - * superblock. That is not possible on ext4, because we may have other - * write ordering constraints on the superblock which prevent us from - * writing it out straight away; and given that the journal is about to - * be aborted, we can't rely on the current, or future, transactions to - * write out the superblock safely. - * - * We'll just use the jbd2_journal_abort() error code to record an error in - * the journal instead. On recovery, the journal will compain about - * that error until we've noted it down and cleared it. - */ - -static void ext4_handle_error(struct super_block *sb) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - - if (sb->s_flags & MS_RDONLY) - return; - - if (!test_opt (sb, ERRORS_CONT)) { - journal_t *journal = EXT4_SB(sb)->s_journal; - - EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; - if (journal) - jbd2_journal_abort(journal, -EIO); - } - if (test_opt (sb, ERRORS_RO)) { - printk (KERN_CRIT "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; - } - ext4_commit_super(sb, es, 1); - if (test_opt(sb, ERRORS_PANIC)) - panic("EXT4-fs (device %s): panic forced after error\n", - sb->s_id); -} - -void ext4_error (struct super_block * sb, const char * function, - const char * fmt, ...) -{ - va_list args; - - va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); - vprintk(fmt, args); - printk("\n"); - va_end(args); - - ext4_handle_error(sb); -} - -static const char *ext4_decode_error(struct super_block * sb, int errno, - char nbuf[16]) -{ - char *errstr = NULL; - - switch (errno) { - case -EIO: - errstr = "IO failure"; - break; - case -ENOMEM: - errstr = "Out of memory"; - break; - case -EROFS: - if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) - errstr = "Journal has aborted"; - else - errstr = "Readonly filesystem"; - break; - default: - /* If the caller passed in an extra buffer for unknown - * errors, textualise them now. Else we just return - * NULL. */ - if (nbuf) { - /* Check for truncated error codes... */ - if (snprintf(nbuf, 16, "error %d", -errno) >= 0) - errstr = nbuf; - } - break; - } - - return errstr; -} - -/* __ext4_std_error decodes expected errors from journaling functions - * automatically and invokes the appropriate error response. */ - -void __ext4_std_error (struct super_block * sb, const char * function, - int errno) -{ - char nbuf[16]; - const char *errstr; - - /* Special case: if the error is EROFS, and we're not already - * inside a transaction, then there's really no point in logging - * an error. */ - if (errno == -EROFS && journal_current_handle() == NULL && - (sb->s_flags & MS_RDONLY)) - return; - - errstr = ext4_decode_error(sb, errno, nbuf); - printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", - sb->s_id, function, errstr); - - ext4_handle_error(sb); -} - -/* - * ext4_abort is a much stronger failure handler than ext4_error. The - * abort function may be used to deal with unrecoverable failures such - * as journal IO errors or ENOMEM at a critical moment in log management. - * - * We unconditionally force the filesystem into an ABORT|READONLY state, - * unless the error response on the fs has been set to panic in which - * case we take the easy way out and panic immediately. - */ - -void ext4_abort (struct super_block * sb, const char * function, - const char * fmt, ...) -{ - va_list args; - - printk (KERN_CRIT "ext4_abort called.\n"); - - va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); - vprintk(fmt, args); - printk("\n"); - va_end(args); - - if (test_opt(sb, ERRORS_PANIC)) - panic("EXT4-fs panic from previous error\n"); - - if (sb->s_flags & MS_RDONLY) - return; - - printk(KERN_CRIT "Remounting filesystem read-only\n"); - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - sb->s_flags |= MS_RDONLY; - EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); -} - -void ext4_warning (struct super_block * sb, const char * function, - const char * fmt, ...) -{ - va_list args; - - va_start(args, fmt); - printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", - sb->s_id, function); - vprintk(fmt, args); - printk("\n"); - va_end(args); -} - -void ext4_update_dynamic_rev(struct super_block *sb) -{ - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - - if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) - return; - - ext4_warning(sb, __FUNCTION__, - "updating to rev %d because of new feature flag, " - "running e2fsck is recommended", - EXT4_DYNAMIC_REV); - - es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); - es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); - es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); - /* leave es->s_feature_*compat flags alone */ - /* es->s_uuid will be set by e2fsck if empty */ - - /* - * The rest of the superblock fields should be zero, and if not it - * means they are likely already in use, so leave them alone. We - * can leave it up to e2fsck to clean up any inconsistencies there. - */ -} - -/* - * Open the external journal device - */ -static struct block_device *ext4_blkdev_get(dev_t dev) -{ - struct block_device *bdev; - char b[BDEVNAME_SIZE]; - - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); - if (IS_ERR(bdev)) - goto fail; - return bdev; - -fail: - printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", - __bdevname(dev, b), PTR_ERR(bdev)); - return NULL; -} - -/* - * Release the journal device - */ -static int ext4_blkdev_put(struct block_device *bdev) -{ - bd_release(bdev); - return blkdev_put(bdev); -} - -static int ext4_blkdev_remove(struct ext4_sb_info *sbi) -{ - struct block_device *bdev; - int ret = -ENODEV; - - bdev = sbi->journal_bdev; - if (bdev) { - ret = ext4_blkdev_put(bdev); - sbi->journal_bdev = NULL; - } - return ret; -} - -static inline struct inode *orphan_list_entry(struct list_head *l) -{ - return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; -} - -static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) -{ - struct list_head *l; - - printk(KERN_ERR "sb orphan head is %d\n", - le32_to_cpu(sbi->s_es->s_last_orphan)); - - printk(KERN_ERR "sb_info orphan list:\n"); - list_for_each(l, &sbi->s_orphan) { - struct inode *inode = orphan_list_entry(l); - printk(KERN_ERR " " - "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", - inode->i_sb->s_id, inode->i_ino, inode, - inode->i_mode, inode->i_nlink, - NEXT_ORPHAN(inode)); - } -} - -static void ext4_put_super (struct super_block * sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - int i; - - ext4_ext_release(sb); - ext4_xattr_put_super(sb); - jbd2_journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - es->s_state = cpu_to_le16(sbi->s_mount_state); - BUFFER_TRACE(sbi->s_sbh, "marking dirty"); - mark_buffer_dirty(sbi->s_sbh); - ext4_commit_super(sb, es, 1); - } - - for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - brelse(sbi->s_sbh); -#ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); -#endif - - /* Debugging code just in case the in-memory inode orphan list - * isn't empty. The on-disk one can be non-empty if we've - * detected an error and taken the fs readonly, but the - * in-memory list had better be clean by this point. */ - if (!list_empty(&sbi->s_orphan)) - dump_orphan_list(sb, sbi); - J_ASSERT(list_empty(&sbi->s_orphan)); - - invalidate_bdev(sb->s_bdev, 0); - if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { - /* - * Invalidate the journal device's buffers. We don't want them - * floating about in memory - the physical journal device may - * hotswapped, and it breaks the `ro-after' testing code. - */ - sync_blockdev(sbi->journal_bdev); - invalidate_bdev(sbi->journal_bdev, 0); - ext4_blkdev_remove(sbi); - } - sb->s_fs_info = NULL; - kfree(sbi); - return; -} - -static kmem_cache_t *ext4_inode_cachep; - -/* - * Called inside transaction, so use GFP_NOFS - */ -static struct inode *ext4_alloc_inode(struct super_block *sb) -{ - struct ext4_inode_info *ei; - - ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS); - if (!ei) - return NULL; -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif - ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; - memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); - return &ei->vfs_inode; -} - -static void ext4_destroy_inode(struct inode *inode) -{ - kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); -} - -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) -{ - struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; - - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_orphan); -#ifdef CONFIG_EXT4DEV_FS_XATTR - init_rwsem(&ei->xattr_sem); -#endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); - } -} - -static int init_inodecache(void) -{ - ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", - sizeof(struct ext4_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); - if (ext4_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - kmem_cache_destroy(ext4_inode_cachep); -} - -static void ext4_clear_inode(struct inode *inode) -{ - struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - if (EXT4_I(inode)->i_acl && - EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_acl); - EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; - } - if (EXT4_I(inode)->i_default_acl && - EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_default_acl); - EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; - } -#endif - ext4_discard_reservation(inode); - EXT4_I(inode)->i_block_alloc_info = NULL; - if (unlikely(rsv)) - kfree(rsv); -} - -static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) -{ -#if defined(CONFIG_QUOTA) - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_jquota_fmt) - seq_printf(seq, ",jqfmt=%s", - (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); - - if (sbi->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); - - if (sbi->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) - seq_puts(seq, ",usrquota"); - - if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) - seq_puts(seq, ",grpquota"); -#endif -} - -static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) -{ - struct super_block *sb = vfs->mnt_sb; - - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - seq_puts(seq, ",data=journal"); - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - seq_puts(seq, ",data=ordered"); - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - seq_puts(seq, ",data=writeback"); - - ext4_show_quota_options(seq, sb); - - return 0; -} - - -static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) -{ - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; - struct inode *inode; - struct dentry *result; - - if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-ESTALE); - if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) - return ERR_PTR(-ESTALE); - - /* iget isn't really right if the inode is currently unallocated!! - * - * ext4_read_inode will return a bad_inode if the inode had been - * deleted, so we should be safe. - * - * Currently we don't know the generation for parent directory, so - * a generation of 0 means "accept any" - */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { - iput(inode); - return ERR_PTR(-ESTALE); - } - /* now to find a dentry. - * If possible, get a well-connected one - */ - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; -} - -#ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") -#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) - -static int ext4_dquot_initialize(struct inode *inode, int type); -static int ext4_dquot_drop(struct inode *inode); -static int ext4_write_dquot(struct dquot *dquot); -static int ext4_acquire_dquot(struct dquot *dquot); -static int ext4_release_dquot(struct dquot *dquot); -static int ext4_mark_dquot_dirty(struct dquot *dquot); -static int ext4_write_info(struct super_block *sb, int type); -static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path); -static int ext4_quota_on_mount(struct super_block *sb, int type); -static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off); -static ssize_t ext4_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off); - -static struct dquot_operations ext4_quota_operations = { - .initialize = ext4_dquot_initialize, - .drop = ext4_dquot_drop, - .alloc_space = dquot_alloc_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, - .write_dquot = ext4_write_dquot, - .acquire_dquot = ext4_acquire_dquot, - .release_dquot = ext4_release_dquot, - .mark_dirty = ext4_mark_dquot_dirty, - .write_info = ext4_write_info -}; - -static struct quotactl_ops ext4_qctl_operations = { - .quota_on = ext4_quota_on, - .quota_off = vfs_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk -}; -#endif - -static struct super_operations ext4_sops = { - .alloc_inode = ext4_alloc_inode, - .destroy_inode = ext4_destroy_inode, - .read_inode = ext4_read_inode, - .write_inode = ext4_write_inode, - .dirty_inode = ext4_dirty_inode, - .delete_inode = ext4_delete_inode, - .put_super = ext4_put_super, - .write_super = ext4_write_super, - .sync_fs = ext4_sync_fs, - .write_super_lockfs = ext4_write_super_lockfs, - .unlockfs = ext4_unlockfs, - .statfs = ext4_statfs, - .remount_fs = ext4_remount, - .clear_inode = ext4_clear_inode, - .show_options = ext4_show_options, -#ifdef CONFIG_QUOTA - .quota_read = ext4_quota_read, - .quota_write = ext4_quota_write, -#endif -}; - -static struct export_operations ext4_export_ops = { - .get_parent = ext4_get_parent, - .get_dentry = ext4_get_dentry, -}; - -enum { - Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, - Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_extents, -}; - -static match_table_t tokens = { - {Opt_bsd_df, "bsddf"}, - {Opt_minix_df, "minixdf"}, - {Opt_grpid, "grpid"}, - {Opt_grpid, "bsdgroups"}, - {Opt_nogrpid, "nogrpid"}, - {Opt_nogrpid, "sysvgroups"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_sb, "sb=%u"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_nouid32, "nouid32"}, - {Opt_nocheck, "nocheck"}, - {Opt_nocheck, "check=none"}, - {Opt_debug, "debug"}, - {Opt_oldalloc, "oldalloc"}, - {Opt_orlov, "orlov"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_reservation, "reservation"}, - {Opt_noreservation, "noreservation"}, - {Opt_noload, "noload"}, - {Opt_nobh, "nobh"}, - {Opt_bh, "bh"}, - {Opt_commit, "commit=%u"}, - {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, - {Opt_journal_dev, "journal_dev=%u"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_grpquota, "grpquota"}, - {Opt_noquota, "noquota"}, - {Opt_quota, "quota"}, - {Opt_usrquota, "usrquota"}, - {Opt_barrier, "barrier=%u"}, - {Opt_extents, "extents"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -}; - -static ext4_fsblk_t get_sb_block(void **data) -{ - ext4_fsblk_t sb_block; - char *options = (char *) *data; - - if (!options || strncmp(options, "sb=", 3) != 0) - return 1; /* Default location */ - options += 3; - /*todo: use simple_strtoll with >32bit ext4 */ - sb_block = simple_strtoul(options, &options, 0); - if (*options && *options != ',') { - printk("EXT4-fs: Invalid sb specification: %s\n", - (char *) *data); - return 1; - } - if (*options == ',') - options++; - *data = (void *) options; - return sb_block; -} - -static int parse_options (char *options, struct super_block *sb, - unsigned int *inum, unsigned long *journal_devnum, - ext4_fsblk_t *n_blocks_count, int is_remount) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - char * p; - substring_t args[MAX_OPT_ARGS]; - int data_opt = 0; - int option; -#ifdef CONFIG_QUOTA - int qtype; - char *qname; -#endif - - if (!options) - return 1; - - while ((p = strsep (&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_bsd_df: - clear_opt (sbi->s_mount_opt, MINIX_DF); - break; - case Opt_minix_df: - set_opt (sbi->s_mount_opt, MINIX_DF); - break; - case Opt_grpid: - set_opt (sbi->s_mount_opt, GRPID); - break; - case Opt_nogrpid: - clear_opt (sbi->s_mount_opt, GRPID); - break; - case Opt_resuid: - if (match_int(&args[0], &option)) - return 0; - sbi->s_resuid = option; - break; - case Opt_resgid: - if (match_int(&args[0], &option)) - return 0; - sbi->s_resgid = option; - break; - case Opt_sb: - /* handled by get_sb_block() instead of here */ - /* *sb_block = match_int(&args[0]); */ - break; - case Opt_err_panic: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_RO); - set_opt (sbi->s_mount_opt, ERRORS_PANIC); - break; - case Opt_err_ro: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_RO); - break; - case Opt_err_cont: - clear_opt (sbi->s_mount_opt, ERRORS_RO); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_CONT); - break; - case Opt_nouid32: - set_opt (sbi->s_mount_opt, NO_UID32); - break; - case Opt_nocheck: - clear_opt (sbi->s_mount_opt, CHECK); - break; - case Opt_debug: - set_opt (sbi->s_mount_opt, DEBUG); - break; - case Opt_oldalloc: - set_opt (sbi->s_mount_opt, OLDALLOC); - break; - case Opt_orlov: - clear_opt (sbi->s_mount_opt, OLDALLOC); - break; -#ifdef CONFIG_EXT4DEV_FS_XATTR - case Opt_user_xattr: - set_opt (sbi->s_mount_opt, XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt (sbi->s_mount_opt, XATTR_USER); - break; -#else - case Opt_user_xattr: - case Opt_nouser_xattr: - printk("EXT4 (no)user_xattr options not supported\n"); - break; -#endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi->s_mount_opt, POSIX_ACL); - break; - case Opt_noacl: - clear_opt(sbi->s_mount_opt, POSIX_ACL); - break; -#else - case Opt_acl: - case Opt_noacl: - printk("EXT4 (no)acl options not supported\n"); - break; -#endif - case Opt_reservation: - set_opt(sbi->s_mount_opt, RESERVATION); - break; - case Opt_noreservation: - clear_opt(sbi->s_mount_opt, RESERVATION); - break; - case Opt_journal_update: - /* @@@ FIXME */ - /* Eventually we will want to be able to create - a journal file here. For now, only allow the - user to specify an existing inode to be the - journal file. */ - if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); - return 0; - } - set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); - break; - case Opt_journal_inum: - if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); - return 0; - } - if (match_int(&args[0], &option)) - return 0; - *inum = option; - break; - case Opt_journal_dev: - if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); - return 0; - } - if (match_int(&args[0], &option)) - return 0; - *journal_devnum = option; - break; - case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); - break; - case Opt_commit: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - if (option == 0) - option = JBD_DEFAULT_MAX_COMMIT_AGE; - sbi->s_commit_interval = HZ * option; - break; - case Opt_data_journal: - data_opt = EXT4_MOUNT_JOURNAL_DATA; - goto datacheck; - case Opt_data_ordered: - data_opt = EXT4_MOUNT_ORDERED_DATA; - goto datacheck; - case Opt_data_writeback: - data_opt = EXT4_MOUNT_WRITEBACK_DATA; - datacheck: - if (is_remount) { - if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) - != data_opt) { - printk(KERN_ERR - "EXT4-fs: cannot change data " - "mode on remount\n"); - return 0; - } - } else { - sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; - sbi->s_mount_opt |= data_opt; - } - break; -#ifdef CONFIG_QUOTA - case Opt_usrjquota: - qtype = USRQUOTA; - goto set_qf_name; - case Opt_grpjquota: - qtype = GRPQUOTA; -set_qf_name: - if (sb_any_quota_enabled(sb)) { - printk(KERN_ERR - "EXT4-fs: Cannot change journalled " - "quota options when quota turned on.\n"); - return 0; - } - qname = match_strdup(&args[0]); - if (!qname) { - printk(KERN_ERR - "EXT4-fs: not enough memory for " - "storing quotafile name.\n"); - return 0; - } - if (sbi->s_qf_names[qtype] && - strcmp(sbi->s_qf_names[qtype], qname)) { - printk(KERN_ERR - "EXT4-fs: %s quota file already " - "specified.\n", QTYPE2NAME(qtype)); - kfree(qname); - return 0; - } - sbi->s_qf_names[qtype] = qname; - if (strchr(sbi->s_qf_names[qtype], '/')) { - printk(KERN_ERR - "EXT4-fs: quotafile must be on " - "filesystem root.\n"); - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; - return 0; - } - set_opt(sbi->s_mount_opt, QUOTA); - break; - case Opt_offusrjquota: - qtype = USRQUOTA; - goto clear_qf_name; - case Opt_offgrpjquota: - qtype = GRPQUOTA; -clear_qf_name: - if (sb_any_quota_enabled(sb)) { - printk(KERN_ERR "EXT4-fs: Cannot change " - "journalled quota options when " - "quota turned on.\n"); - return 0; - } - /* - * The space will be released later when all options - * are confirmed to be correct - */ - sbi->s_qf_names[qtype] = NULL; - break; - case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; - break; - case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; - break; - case Opt_quota: - case Opt_usrquota: - set_opt(sbi->s_mount_opt, QUOTA); - set_opt(sbi->s_mount_opt, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sbi->s_mount_opt, QUOTA); - set_opt(sbi->s_mount_opt, GRPQUOTA); - break; - case Opt_noquota: - if (sb_any_quota_enabled(sb)) { - printk(KERN_ERR "EXT4-fs: Cannot change quota " - "options when quota turned on.\n"); - return 0; - } - clear_opt(sbi->s_mount_opt, QUOTA); - clear_opt(sbi->s_mount_opt, USRQUOTA); - clear_opt(sbi->s_mount_opt, GRPQUOTA); - break; -#else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - printk(KERN_ERR - "EXT4-fs: journalled quota options not " - "supported.\n"); - break; - case Opt_noquota: - break; -#endif - case Opt_abort: - set_opt(sbi->s_mount_opt, ABORT); - break; - case Opt_barrier: - if (match_int(&args[0], &option)) - return 0; - if (option) - set_opt(sbi->s_mount_opt, BARRIER); - else - clear_opt(sbi->s_mount_opt, BARRIER); - break; - case Opt_ignore: - break; - case Opt_resize: - if (!is_remount) { - printk("EXT4-fs: resize option only available " - "for remount\n"); - return 0; - } - if (match_int(&args[0], &option) != 0) - return 0; - *n_blocks_count = option; - break; - case Opt_nobh: - set_opt(sbi->s_mount_opt, NOBH); - break; - case Opt_bh: - clear_opt(sbi->s_mount_opt, NOBH); - break; - case Opt_extents: - set_opt (sbi->s_mount_opt, EXTENTS); - break; - default: - printk (KERN_ERR - "EXT4-fs: Unrecognized mount option \"%s\" " - "or missing value\n", p); - return 0; - } - } -#ifdef CONFIG_QUOTA - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && - sbi->s_qf_names[USRQUOTA]) - clear_opt(sbi->s_mount_opt, USRQUOTA); - - if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && - sbi->s_qf_names[GRPQUOTA]) - clear_opt(sbi->s_mount_opt, GRPQUOTA); - - if ((sbi->s_qf_names[USRQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || - (sbi->s_qf_names[GRPQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { - printk(KERN_ERR "EXT4-fs: old and new quota " - "format mixing.\n"); - return 0; - } - - if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " - "not specified.\n"); - return 0; - } - } else { - if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " - "specified with no journalling " - "enabled.\n"); - return 0; - } - } -#endif - return 1; -} - -static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, - int read_only) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - int res = 0; - - if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { - printk (KERN_ERR "EXT4-fs warning: revision level too high, " - "forcing read-only mode\n"); - res = MS_RDONLY; - } - if (read_only) - return res; - if (!(sbi->s_mount_state & EXT4_VALID_FS)) - printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); - else if ((sbi->s_mount_state & EXT4_ERROR_FS)) - printk (KERN_WARNING - "EXT4-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); - else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && - le16_to_cpu(es->s_mnt_count) >= - (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk (KERN_WARNING - "EXT4-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); - else if (le32_to_cpu(es->s_checkinterval) && - (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk (KERN_WARNING - "EXT4-fs warning: checktime reached, " - "running e2fsck is recommended\n"); -#if 0 - /* @@@ We _will_ want to clear the valid bit if we find - * inconsistencies, to force a fsck at reboot. But for - * a plain journaled filesystem we can keep it set as - * valid forever! :) - */ - es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); -#endif - if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) - es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); - es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); - es->s_mtime = cpu_to_le32(get_seconds()); - ext4_update_dynamic_rev(sb); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - - ext4_commit_super(sb, es, 1); - if (test_opt(sb, DEBUG)) - printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " - "bpg=%lu, ipg=%lu, mo=%04lx]\n", - sb->s_blocksize, - sbi->s_groups_count, - EXT4_BLOCKS_PER_GROUP(sb), - EXT4_INODES_PER_GROUP(sb), - sbi->s_mount_opt); - - printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); - if (EXT4_SB(sb)->s_journal->j_inode == NULL) { - char b[BDEVNAME_SIZE]; - - printk("external journal on %s\n", - bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); - } else { - printk("internal journal\n"); - } - return res; -} - -/* Called at mount-time, super-block is locked */ -static int ext4_check_descriptors (struct super_block * sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); - ext4_fsblk_t last_block; - ext4_fsblk_t block_bitmap; - ext4_fsblk_t inode_bitmap; - ext4_fsblk_t inode_table; - struct ext4_group_desc * gdp = NULL; - int desc_block = 0; - int i; - - ext4_debug ("Checking group descriptors"); - - for (i = 0; i < sbi->s_groups_count; i++) - { - if (i == sbi->s_groups_count - 1) - last_block = ext4_blocks_count(sbi->s_es) - 1; - else - last_block = first_block + - (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext4_group_desc *) - sbi->s_group_desc[desc_block++]->b_data; - block_bitmap = ext4_block_bitmap(sb, gdp); - if (block_bitmap < first_block || block_bitmap > last_block) - { - ext4_error (sb, "ext4_check_descriptors", - "Block bitmap for group %d" - " not in group (block %llu)!", - i, block_bitmap); - return 0; - } - inode_bitmap = ext4_inode_bitmap(sb, gdp); - if (inode_bitmap < first_block || inode_bitmap > last_block) - { - ext4_error (sb, "ext4_check_descriptors", - "Inode bitmap for group %d" - " not in group (block %llu)!", - i, inode_bitmap); - return 0; - } - inode_table = ext4_inode_table(sb, gdp); - if (inode_table < first_block || - inode_table + sbi->s_itb_per_group > last_block) - { - ext4_error (sb, "ext4_check_descriptors", - "Inode table for group %d" - " not in group (block %llu)!", - i, inode_table); - return 0; - } - first_block += EXT4_BLOCKS_PER_GROUP(sb); - gdp = (struct ext4_group_desc *) - ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); - } - - ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); - return 1; -} - - -/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at - * the superblock) which were deleted from all directories, but held open by - * a process at the time of a crash. We walk the list and try to delete these - * inodes at recovery time (only with a read-write filesystem). - * - * In order to keep the orphan inode chain consistent during traversal (in - * case of crash during recovery), we link each inode into the superblock - * orphan list_head and handle it the same way as an inode deletion during - * normal operation (which journals the operations for us). - * - * We only do an iget() and an iput() on each inode, which is very safe if we - * accidentally point at an in-use or already deleted inode. The worst that - * can happen in this case is that we get a "bit already cleared" message from - * ext4_free_inode(). The only reason we would point at a wrong inode is if - * e2fsck was run on this filesystem, and it must have already done the orphan - * inode cleanup for us, so we can safely abort without any further action. - */ -static void ext4_orphan_cleanup (struct super_block * sb, - struct ext4_super_block * es) -{ - unsigned int s_flags = sb->s_flags; - int nr_orphans = 0, nr_truncates = 0; -#ifdef CONFIG_QUOTA - int i; -#endif - if (!es->s_last_orphan) { - jbd_debug(4, "no orphan inodes to clean up\n"); - return; - } - - if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { - if (es->s_last_orphan) - jbd_debug(1, "Errors on filesystem, " - "clearing orphan list.\n"); - es->s_last_orphan = 0; - jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); - return; - } - - if (s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", - sb->s_id); - sb->s_flags &= ~MS_RDONLY; - } -#ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { - if (EXT4_SB(sb)->s_qf_names[i]) { - int ret = ext4_quota_on_mount(sb, i); - if (ret < 0) - printk(KERN_ERR - "EXT4-fs: Cannot turn on journalled " - "quota: error %d\n", ret); - } - } -#endif - - while (es->s_last_orphan) { - struct inode *inode; - - if (!(inode = - ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { - es->s_last_orphan = 0; - break; - } - - list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - DQUOT_INIT(inode); - if (inode->i_nlink) { - printk(KERN_DEBUG - "%s: truncating inode %lu to %Ld bytes\n", - __FUNCTION__, inode->i_ino, inode->i_size); - jbd_debug(2, "truncating inode %lu to %Ld bytes\n", - inode->i_ino, inode->i_size); - ext4_truncate(inode); - nr_truncates++; - } else { - printk(KERN_DEBUG - "%s: deleting unreferenced inode %lu\n", - __FUNCTION__, inode->i_ino); - jbd_debug(2, "deleting unreferenced inode %lu\n", - inode->i_ino); - nr_orphans++; - } - iput(inode); /* The delete magic happens here! */ - } - -#define PLURAL(x) (x), ((x)==1) ? "" : "s" - - if (nr_orphans) - printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", - sb->s_id, PLURAL(nr_orphans)); - if (nr_truncates) - printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", - sb->s_id, PLURAL(nr_truncates)); -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - vfs_quota_off(sb, i); - } -#endif - sb->s_flags = s_flags; /* Restore MS_RDONLY status */ -} - -#define log2(n) ffz(~(n)) - -/* - * Maximal file size. There is a direct, and {,double-,triple-}indirect - * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. - * We need to be 1 filesystem block less than the 2^32 sector limit. - */ -static loff_t ext4_max_size(int bits) -{ - loff_t res = EXT4_NDIR_BLOCKS; - /* This constant is calculated to be the largest file size for a - * dense, 4k-blocksize file such that the total number of - * sectors in the file, including data and all indirect blocks, - * does not exceed 2^32. */ - const loff_t upper_limit = 0x1ff7fffd000LL; - - res += 1LL << (bits-2); - res += 1LL << (2*(bits-2)); - res += 1LL << (3*(bits-2)); - res <<= bits; - if (res > upper_limit) - res = upper_limit; - return res; -} - -static ext4_fsblk_t descriptor_loc(struct super_block *sb, - ext4_fsblk_t logical_sb_block, int nr) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned long bg, first_meta_bg; - int has_super = 0; - - first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); - - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || - nr < first_meta_bg) - return logical_sb_block + nr + 1; - bg = sbi->s_desc_per_block * nr; - if (ext4_bg_has_super(sb, bg)) - has_super = 1; - return (has_super + ext4_group_first_block_no(sb, bg)); -} - - -static int ext4_fill_super (struct super_block *sb, void *data, int silent) -{ - struct buffer_head * bh; - struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi; - ext4_fsblk_t block; - ext4_fsblk_t sb_block = get_sb_block(&data); - ext4_fsblk_t logical_sb_block; - unsigned long offset = 0; - unsigned int journal_inum = 0; - unsigned long journal_devnum = 0; - unsigned long def_mount_opts; - struct inode *root; - int blocksize; - int hblock; - int db_count; - int i; - int needs_recovery; - __le32 features; - __u64 blocks_count; - - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - sb->s_fs_info = sbi; - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT4_DEF_RESUID; - sbi->s_resgid = EXT4_DEF_RESGID; - - unlock_kernel(); - - blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); - if (!blocksize) { - printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); - goto out_fail; - } - - /* - * The ext4 superblock will not be buffer aligned for other than 1kB - * block sizes. We need to calculate the offset from buffer start. - */ - if (blocksize != EXT4_MIN_BLOCK_SIZE) { - logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; - offset = do_div(logical_sb_block, blocksize); - } else { - logical_sb_block = sb_block; - } - - if (!(bh = sb_bread(sb, logical_sb_block))) { - printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); - goto out_fail; - } - /* - * Note: s_es must be initialized as soon as possible because - * some ext4 macro-instructions depend on its value - */ - es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); - sbi->s_es = es; - sb->s_magic = le16_to_cpu(es->s_magic); - if (sb->s_magic != EXT4_SUPER_MAGIC) - goto cantfind_ext4; - - /* Set defaults before we parse the mount options */ - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - if (def_mount_opts & EXT4_DEFM_DEBUG) - set_opt(sbi->s_mount_opt, DEBUG); - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) - set_opt(sbi->s_mount_opt, GRPID); - if (def_mount_opts & EXT4_DEFM_UID16) - set_opt(sbi->s_mount_opt, NO_UID32); - if (def_mount_opts & EXT4_DEFM_XATTR_USER) - set_opt(sbi->s_mount_opt, XATTR_USER); - if (def_mount_opts & EXT4_DEFM_ACL) - set_opt(sbi->s_mount_opt, POSIX_ACL); - if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) - sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; - else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) - sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; - else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) - sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; - - if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) - set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) - set_opt(sbi->s_mount_opt, ERRORS_RO); - else - set_opt(sbi->s_mount_opt, ERRORS_CONT); - - sbi->s_resuid = le16_to_cpu(es->s_def_resuid); - sbi->s_resgid = le16_to_cpu(es->s_def_resgid); - - set_opt(sbi->s_mount_opt, RESERVATION); - - if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, - NULL, 0)) - goto failed_mount; - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); - - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && - (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) - printk(KERN_WARNING - "EXT4-fs warning: feature flags set on rev 0 fs, " - "running e2fsck is recommended\n"); - /* - * Check feature flags regardless of the revision level, since we - * previously didn't change the revision level when setting the flags, - * so there is a chance incompat flags are set on a rev 0 filesystem. - */ - features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); - if (features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " - "unsupported optional features (%x).\n", - sb->s_id, le32_to_cpu(features)); - goto failed_mount; - } - features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); - if (!(sb->s_flags & MS_RDONLY) && features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " - "unsupported optional features (%x).\n", - sb->s_id, le32_to_cpu(features)); - goto failed_mount; - } - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - printk(KERN_ERR - "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", - blocksize, sb->s_id); - goto failed_mount; - } - - hblock = bdev_hardsect_size(sb->s_bdev); - if (sb->s_blocksize != blocksize) { - /* - * Make sure the blocksize for the filesystem is larger - * than the hardware sectorsize for the machine. - */ - if (blocksize < hblock) { - printk(KERN_ERR "EXT4-fs: blocksize %d too small for " - "device blocksize %d.\n", blocksize, hblock); - goto failed_mount; - } - - brelse (bh); - sb_set_blocksize(sb, blocksize); - logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; - offset = do_div(logical_sb_block, blocksize); - bh = sb_bread(sb, logical_sb_block); - if (!bh) { - printk(KERN_ERR - "EXT4-fs: Can't read superblock on 2nd try.\n"); - goto failed_mount; - } - es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); - sbi->s_es = es; - if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - printk (KERN_ERR - "EXT4-fs: Magic mismatch, very weird !\n"); - goto failed_mount; - } - } - - sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); - - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { - sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; - sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); - if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || - (sbi->s_inode_size & (sbi->s_inode_size - 1)) || - (sbi->s_inode_size > blocksize)) { - printk (KERN_ERR - "EXT4-fs: unsupported inode size: %d\n", - sbi->s_inode_size); - goto failed_mount; - } - } - sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (blocksize != sbi->s_frag_size) { - printk(KERN_ERR - "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n", - sbi->s_frag_size, blocksize); - goto failed_mount; - } - sbi->s_desc_size = le16_to_cpu(es->s_desc_size); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { - if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || - sbi->s_desc_size > EXT4_MAX_DESC_SIZE || - sbi->s_desc_size & (sbi->s_desc_size - 1)) { - printk(KERN_ERR - "EXT4-fs: unsupported descriptor size %lu\n", - sbi->s_desc_size); - goto failed_mount; - } - } else - sbi->s_desc_size = EXT4_MIN_DESC_SIZE; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); - sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - if (EXT4_INODE_SIZE(sb) == 0) - goto cantfind_ext4; - sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); - if (sbi->s_inodes_per_block == 0) - goto cantfind_ext4; - sbi->s_itb_per_group = sbi->s_inodes_per_group / - sbi->s_inodes_per_block; - sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); - sbi->s_sbh = bh; - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT4_DESC_PER_BLOCK(sb)); - for (i=0; i < 4; i++) - sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); - sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #blocks per group too big: %lu\n", - sbi->s_blocks_per_group); - goto failed_mount; - } - if (sbi->s_frags_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #fragments per group too big: %lu\n", - sbi->s_frags_per_group); - goto failed_mount; - } - if (sbi->s_inodes_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #inodes per group too big: %lu\n", - sbi->s_inodes_per_group); - goto failed_mount; - } - - if (ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT4-fs: filesystem on %s:" - " too large to mount safely\n", sb->s_id); - if (sizeof(sector_t) < 8) - printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " - "enabled\n"); - goto failed_mount; - } - - if (EXT4_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext4; - blocks_count = (ext4_blocks_count(es) - - le32_to_cpu(es->s_first_data_block) + - EXT4_BLOCKS_PER_GROUP(sb) - 1); - do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); - sbi->s_groups_count = blocks_count; - db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), - GFP_KERNEL); - if (sbi->s_group_desc == NULL) { - printk (KERN_ERR "EXT4-fs: not enough memory\n"); - goto failed_mount; - } - - bgl_lock_init(&sbi->s_blockgroup_lock); - - for (i = 0; i < db_count; i++) { - block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread(sb, block); - if (!sbi->s_group_desc[i]) { - printk (KERN_ERR "EXT4-fs: " - "can't read group descriptor %d\n", i); - db_count = i; - goto failed_mount2; - } - } - if (!ext4_check_descriptors (sb)) { - printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); - goto failed_mount2; - } - sbi->s_gdb_count = db_count; - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); - spin_lock_init(&sbi->s_next_gen_lock); - - percpu_counter_init(&sbi->s_freeblocks_counter, - ext4_count_free_blocks(sb)); - percpu_counter_init(&sbi->s_freeinodes_counter, - ext4_count_free_inodes(sb)); - percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); - - /* per fileystem reservation list head & lock */ - spin_lock_init(&sbi->s_rsv_window_lock); - sbi->s_rsv_window_root = RB_ROOT; - /* Add a single, static dummy reservation to the start of the - * reservation window list --- it gives us a placeholder for - * append-at-start-of-list which makes the allocation logic - * _much_ simpler. */ - sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - sbi->s_rsv_window_head.rsv_alloc_hit = 0; - sbi->s_rsv_window_head.rsv_goal_size = 0; - ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); - - /* - * set up enough so that it can read an inode - */ - sb->s_op = &ext4_sops; - sb->s_export_op = &ext4_export_ops; - sb->s_xattr = ext4_xattr_handlers; -#ifdef CONFIG_QUOTA - sb->s_qcop = &ext4_qctl_operations; - sb->dq_op = &ext4_quota_operations; -#endif - INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ - - sb->s_root = NULL; - - needs_recovery = (es->s_last_orphan != 0 || - EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_RECOVER)); - - /* - * The first inode we look at is the journal inode. Don't try - * root first: it may be modified in the journal! - */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { - if (ext4_load_journal(sb, es, journal_devnum)) - goto failed_mount3; - } else if (journal_inum) { - if (ext4_create_journal(sb, es, journal_inum)) - goto failed_mount3; - } else { - if (!silent) - printk (KERN_ERR - "ext4: No journal on filesystem on %s\n", - sb->s_id); - goto failed_mount3; - } - - /* We have now updated the journal if required, so we can - * validate the data journaling mode. */ - switch (test_opt(sb, DATA_FLAGS)) { - case 0: - /* No mode set, assume a default based on the journal - * capabilities: ORDERED_DATA if the journal can - * cope, else JOURNAL_DATA - */ - if (jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) - set_opt(sbi->s_mount_opt, ORDERED_DATA); - else - set_opt(sbi->s_mount_opt, JOURNAL_DATA); - break; - - case EXT4_MOUNT_ORDERED_DATA: - case EXT4_MOUNT_WRITEBACK_DATA: - if (!jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - printk(KERN_ERR "EXT4-fs: Journal does not support " - "requested data journaling mode\n"); - goto failed_mount4; - } - default: - break; - } - - if (test_opt(sb, NOBH)) { - if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { - printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " - "its supported only with writeback mode\n"); - clear_opt(sbi->s_mount_opt, NOBH); - } - } - /* - * The jbd2_journal_load will have done any necessary log recovery, - * so we can safely mount the rest of the filesystem now. - */ - - root = iget(sb, EXT4_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - printk(KERN_ERR "EXT4-fs: get root inode failed\n"); - iput(root); - goto failed_mount4; - } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; - printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); - goto failed_mount4; - } - - ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock - * in numerous places. Here we just pop the lock - it's relatively - * harmless, because we are now ready to accept write_super() requests, - * and aviro says that's the only reason for hanging onto the - * superblock lock. - */ - EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; - ext4_orphan_cleanup(sb, es); - EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; - if (needs_recovery) - printk (KERN_INFO "EXT4-fs: recovery complete.\n"); - ext4_mark_recovery_complete(sb, es); - printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", - test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - - ext4_ext_init(sb); - - lock_kernel(); - return 0; - -cantfind_ext4: - if (!silent) - printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", - sb->s_id); - goto failed_mount; - -failed_mount4: - jbd2_journal_destroy(sbi->s_journal); -failed_mount3: - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); -failed_mount2: - for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); -failed_mount: -#ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); -#endif - ext4_blkdev_remove(sbi); - brelse(bh); -out_fail: - sb->s_fs_info = NULL; - kfree(sbi); - lock_kernel(); - return -EINVAL; -} - -/* - * Setup any per-fs journal parameters now. We'll do this both on - * initial mount, once the journal has been initialised but before we've - * done any recovery; and again on any subsequent remount. - */ -static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_commit_interval) - journal->j_commit_interval = sbi->s_commit_interval; - /* We could also set up an ext4-specific default for the commit - * interval here, but for now we'll just fall back to the jbd - * default. */ - - spin_lock(&journal->j_state_lock); - if (test_opt(sb, BARRIER)) - journal->j_flags |= JBD2_BARRIER; - else - journal->j_flags &= ~JBD2_BARRIER; - spin_unlock(&journal->j_state_lock); -} - -static journal_t *ext4_get_journal(struct super_block *sb, - unsigned int journal_inum) -{ - struct inode *journal_inode; - journal_t *journal; - - /* First, test for the existence of a valid inode on disk. Bad - * things happen if we iget() an unused inode, as the subsequent - * iput() will try to delete it. */ - - journal_inode = iget(sb, journal_inum); - if (!journal_inode) { - printk(KERN_ERR "EXT4-fs: no journal found.\n"); - return NULL; - } - if (!journal_inode->i_nlink) { - make_bad_inode(journal_inode); - iput(journal_inode); - printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); - return NULL; - } - - jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", - journal_inode, journal_inode->i_size); - if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { - printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); - iput(journal_inode); - return NULL; - } - - journal = jbd2_journal_init_inode(journal_inode); - if (!journal) { - printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); - iput(journal_inode); - return NULL; - } - journal->j_private = sb; - ext4_init_journal_params(sb, journal); - return journal; -} - -static journal_t *ext4_get_dev_journal(struct super_block *sb, - dev_t j_dev) -{ - struct buffer_head * bh; - journal_t *journal; - ext4_fsblk_t start; - ext4_fsblk_t len; - int hblock, blocksize; - ext4_fsblk_t sb_block; - unsigned long offset; - struct ext4_super_block * es; - struct block_device *bdev; - - bdev = ext4_blkdev_get(j_dev); - if (bdev == NULL) - return NULL; - - if (bd_claim(bdev, sb)) { - printk(KERN_ERR - "EXT4: failed to claim external journal device.\n"); - blkdev_put(bdev); - return NULL; - } - - blocksize = sb->s_blocksize; - hblock = bdev_hardsect_size(bdev); - if (blocksize < hblock) { - printk(KERN_ERR - "EXT4-fs: blocksize too small for journal device.\n"); - goto out_bdev; - } - - sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; - offset = EXT4_MIN_BLOCK_SIZE % blocksize; - set_blocksize(bdev, blocksize); - if (!(bh = __bread(bdev, sb_block, blocksize))) { - printk(KERN_ERR "EXT4-fs: couldn't read superblock of " - "external journal\n"); - goto out_bdev; - } - - es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); - if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || - !(le32_to_cpu(es->s_feature_incompat) & - EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { - printk(KERN_ERR "EXT4-fs: external journal has " - "bad superblock\n"); - brelse(bh); - goto out_bdev; - } - - if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { - printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); - brelse(bh); - goto out_bdev; - } - - len = ext4_blocks_count(es); - start = sb_block + 1; - brelse(bh); /* we're done with the superblock */ - - journal = jbd2_journal_init_dev(bdev, sb->s_bdev, - start, len, blocksize); - if (!journal) { - printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); - goto out_bdev; - } - journal->j_private = sb; - ll_rw_block(READ, 1, &journal->j_sb_buffer); - wait_on_buffer(journal->j_sb_buffer); - if (!buffer_uptodate(journal->j_sb_buffer)) { - printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); - goto out_journal; - } - if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT4-fs: External journal has more than one " - "user (unsupported) - %d\n", - be32_to_cpu(journal->j_superblock->s_nr_users)); - goto out_journal; - } - EXT4_SB(sb)->journal_bdev = bdev; - ext4_init_journal_params(sb, journal); - return journal; -out_journal: - jbd2_journal_destroy(journal); -out_bdev: - ext4_blkdev_put(bdev); - return NULL; -} - -static int ext4_load_journal(struct super_block *sb, - struct ext4_super_block *es, - unsigned long journal_devnum) -{ - journal_t *journal; - unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); - dev_t journal_dev; - int err = 0; - int really_read_only; - - if (journal_devnum && - journal_devnum != le32_to_cpu(es->s_journal_dev)) { - printk(KERN_INFO "EXT4-fs: external journal device major/minor " - "numbers have changed\n"); - journal_dev = new_decode_dev(journal_devnum); - } else - journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); - - really_read_only = bdev_read_only(sb->s_bdev); - - /* - * Are we loading a blank journal or performing recovery after a - * crash? For recovery, we need to check in advance whether we - * can get read-write access to the device. - */ - - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { - if (sb->s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: INFO: recovery " - "required on readonly filesystem.\n"); - if (really_read_only) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, cannot proceed.\n"); - return -EROFS; - } - printk (KERN_INFO "EXT4-fs: write access will " - "be enabled during recovery.\n"); - } - } - - if (journal_inum && journal_dev) { - printk(KERN_ERR "EXT4-fs: filesystem has both journal " - "and inode journals!\n"); - return -EINVAL; - } - - if (journal_inum) { - if (!(journal = ext4_get_journal(sb, journal_inum))) - return -EINVAL; - } else { - if (!(journal = ext4_get_dev_journal(sb, journal_dev))) - return -EINVAL; - } - - if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { - err = jbd2_journal_update_format(journal); - if (err) { - printk(KERN_ERR "EXT4-fs: error updating journal.\n"); - jbd2_journal_destroy(journal); - return err; - } - } - - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) - err = jbd2_journal_wipe(journal, !really_read_only); - if (!err) - err = jbd2_journal_load(journal); - - if (err) { - printk(KERN_ERR "EXT4-fs: error loading journal.\n"); - jbd2_journal_destroy(journal); - return err; - } - - EXT4_SB(sb)->s_journal = journal; - ext4_clear_journal_err(sb, es); - - if (journal_devnum && - journal_devnum != le32_to_cpu(es->s_journal_dev)) { - es->s_journal_dev = cpu_to_le32(journal_devnum); - sb->s_dirt = 1; - - /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); - } - - return 0; -} - -static int ext4_create_journal(struct super_block * sb, - struct ext4_super_block * es, - unsigned int journal_inum) -{ - journal_t *journal; - - if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " - "create journal.\n"); - return -EROFS; - } - - if (!(journal = ext4_get_journal(sb, journal_inum))) - return -EINVAL; - - printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", - journal_inum); - - if (jbd2_journal_create(journal)) { - printk(KERN_ERR "EXT4-fs: error creating journal.\n"); - jbd2_journal_destroy(journal); - return -EIO; - } - - EXT4_SB(sb)->s_journal = journal; - - ext4_update_dynamic_rev(sb); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); - - es->s_journal_inum = cpu_to_le32(journal_inum); - sb->s_dirt = 1; - - /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); - - return 0; -} - -static void ext4_commit_super (struct super_block * sb, - struct ext4_super_block * es, - int sync) -{ - struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; - - if (!sbh) - return; - es->s_wtime = cpu_to_le32(get_seconds()); - ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); - es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); - BUFFER_TRACE(sbh, "marking dirty"); - mark_buffer_dirty(sbh); - if (sync) - sync_dirty_buffer(sbh); -} - - -/* - * Have we just finished recovery? If so, and if we are mounting (or - * remounting) the filesystem readonly, then we will end up with a - * consistent fs on disk. Record that fact. - */ -static void ext4_mark_recovery_complete(struct super_block * sb, - struct ext4_super_block * es) -{ - journal_t *journal = EXT4_SB(sb)->s_journal; - - jbd2_journal_lock_updates(journal); - jbd2_journal_flush(journal); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && - sb->s_flags & MS_RDONLY) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - sb->s_dirt = 0; - ext4_commit_super(sb, es, 1); - } - jbd2_journal_unlock_updates(journal); -} - -/* - * If we are mounting (or read-write remounting) a filesystem whose journal - * has recorded an error from a previous lifetime, move that error to the - * main filesystem now. - */ -static void ext4_clear_journal_err(struct super_block * sb, - struct ext4_super_block * es) -{ - journal_t *journal; - int j_errno; - const char *errstr; - - journal = EXT4_SB(sb)->s_journal; - - /* - * Now check for any error status which may have been recorded in the - * journal by a prior ext4_error() or ext4_abort() - */ - - j_errno = jbd2_journal_errno(journal); - if (j_errno) { - char nbuf[16]; - - errstr = ext4_decode_error(sb, j_errno, nbuf); - ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " - "from previous mount: %s", errstr); - ext4_warning(sb, __FUNCTION__, "Marking fs in need of " - "filesystem check."); - - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super (sb, es, 1); - - jbd2_journal_clear_err(journal); - } -} - -/* - * Force the running and committing transactions to commit, - * and wait on the commit. - */ -int ext4_force_commit(struct super_block *sb) -{ - journal_t *journal; - int ret; - - if (sb->s_flags & MS_RDONLY) - return 0; - - journal = EXT4_SB(sb)->s_journal; - sb->s_dirt = 0; - ret = ext4_journal_force_commit(journal); - return ret; -} - -/* - * Ext4 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this - * point. Just start an async writeback to get the buffers on their way - * to the disk. - * - * This implicitly triggers the writebehind on sync(). - */ - -static void ext4_write_super (struct super_block * sb) -{ - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; -} - -static int ext4_sync_fs(struct super_block *sb, int wait) -{ - tid_t target; - - sb->s_dirt = 0; - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { - if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); - } - return 0; -} - -/* - * LVM calls this function before a (read-only) snapshot is created. This - * gives us a chance to flush the journal completely and mark the fs clean. - */ -static void ext4_write_super_lockfs(struct super_block *sb) -{ - sb->s_dirt = 0; - - if (!(sb->s_flags & MS_RDONLY)) { - journal_t *journal = EXT4_SB(sb)->s_journal; - - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); - jbd2_journal_flush(journal); - - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - } -} - -/* - * Called by LVM after the snapshot is done. We need to reset the RECOVER - * flag here, even though the filesystem is not technically dirty yet. - */ -static void ext4_unlockfs(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) { - lock_super(sb); - /* Reser the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - unlock_super(sb); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - } -} - -static int ext4_remount (struct super_block * sb, int * flags, char * data) -{ - struct ext4_super_block * es; - struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_fsblk_t n_blocks_count = 0; - unsigned long old_sb_flags; - struct ext4_mount_options old_opts; - int err; -#ifdef CONFIG_QUOTA - int i; -#endif - - /* Store the original options */ - old_sb_flags = sb->s_flags; - old_opts.s_mount_opt = sbi->s_mount_opt; - old_opts.s_resuid = sbi->s_resuid; - old_opts.s_resgid = sbi->s_resgid; - old_opts.s_commit_interval = sbi->s_commit_interval; -#ifdef CONFIG_QUOTA - old_opts.s_jquota_fmt = sbi->s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) - old_opts.s_qf_names[i] = sbi->s_qf_names[i]; -#endif - - /* - * Allow the "check" option to be passed as a remount option. - */ - if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { - err = -EINVAL; - goto restore_opts; - } - - if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) - ext4_abort(sb, __FUNCTION__, "Abort forced by user"); - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); - - es = sbi->s_es; - - ext4_init_journal_params(sb, sbi->s_journal); - - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || - n_blocks_count > ext4_blocks_count(es)) { - if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { - err = -EROFS; - goto restore_opts; - } - - if (*flags & MS_RDONLY) { - /* - * First of all, the unconditional stuff we have to do - * to disable replay of the journal when we next remount - */ - sb->s_flags |= MS_RDONLY; - - /* - * OK, test if we are remounting a valid rw partition - * readonly, and if so set the rdonly flag and then - * mark the partition as valid again. - */ - if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && - (sbi->s_mount_state & EXT4_VALID_FS)) - es->s_state = cpu_to_le16(sbi->s_mount_state); - - ext4_mark_recovery_complete(sb, es); - } else { - __le32 ret; - if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, - ~EXT4_FEATURE_RO_COMPAT_SUPP))) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " - "remount RDWR because of unsupported " - "optional features (%x).\n", - sb->s_id, le32_to_cpu(ret)); - err = -EROFS; - goto restore_opts; - } - /* - * Mounting a RDONLY partition read-write, so reread - * and store the current valid flag. (It may have - * been changed by e2fsck since we originally mounted - * the partition.) - */ - ext4_clear_journal_err(sb, es); - sbi->s_mount_state = le16_to_cpu(es->s_state); - if ((err = ext4_group_extend(sb, es, n_blocks_count))) - goto restore_opts; - if (!ext4_setup_super (sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; - } - } -#ifdef CONFIG_QUOTA - /* Release old quota file names */ - for (i = 0; i < MAXQUOTAS; i++) - if (old_opts.s_qf_names[i] && - old_opts.s_qf_names[i] != sbi->s_qf_names[i]) - kfree(old_opts.s_qf_names[i]); -#endif - return 0; -restore_opts: - sb->s_flags = old_sb_flags; - sbi->s_mount_opt = old_opts.s_mount_opt; - sbi->s_resuid = old_opts.s_resuid; - sbi->s_resgid = old_opts.s_resgid; - sbi->s_commit_interval = old_opts.s_commit_interval; -#ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = old_opts.s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i] && - old_opts.s_qf_names[i] != sbi->s_qf_names[i]) - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = old_opts.s_qf_names[i]; - } -#endif - return err; -} - -static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) -{ - struct super_block *sb = dentry->d_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - ext4_fsblk_t overhead; - int i; - - if (test_opt (sb, MINIX_DF)) - overhead = 0; - else { - unsigned long ngroups; - ngroups = EXT4_SB(sb)->s_groups_count; - smp_rmb(); - - /* - * Compute the overhead (FS structures) - */ - - /* - * All of the blocks before first_data_block are - * overhead - */ - overhead = le32_to_cpu(es->s_first_data_block); - - /* - * Add the overhead attributed to the superblock and - * block group descriptors. If the sparse superblocks - * feature is turned on, then not all groups have this. - */ - for (i = 0; i < ngroups; i++) { - overhead += ext4_bg_has_super(sb, i) + - ext4_bg_num_gdb(sb, i); - cond_resched(); - } - - /* - * Every block group has an inode bitmap, a block - * bitmap, and an inode table. - */ - overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); - } - - buf->f_type = EXT4_SUPER_MAGIC; - buf->f_bsize = sb->s_blocksize; - buf->f_blocks = ext4_blocks_count(es) - overhead; - buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); - buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); - if (buf->f_bfree < ext4_r_blocks_count(es)) - buf->f_bavail = 0; - buf->f_files = le32_to_cpu(es->s_inodes_count); - buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); - buf->f_namelen = EXT4_NAME_LEN; - return 0; -} - -/* Helper function for writing quotas on sync - we need to start transaction before quota file - * is locked for write. Otherwise the are possible deadlocks: - * Process 1 Process 2 - * ext4_create() quota_sync() - * jbd2_journal_start() write_dquot() - * DQUOT_INIT() down(dqio_mutex) - * down(dqio_mutex) jbd2_journal_start() - * - */ - -#ifdef CONFIG_QUOTA - -static inline struct inode *dquot_to_inode(struct dquot *dquot) -{ - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; -} - -static int ext4_dquot_initialize(struct inode *inode, int type) -{ - handle_t *handle; - int ret, err; - - /* We may create quota structure so we need to reserve enough blocks */ - handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_initialize(inode, type); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext4_dquot_drop(struct inode *inode) -{ - handle_t *handle; - int ret, err; - - /* We may delete quota structure so we need to reserve enough blocks */ - handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_drop(inode); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext4_write_dquot(struct dquot *dquot) -{ - int ret, err; - handle_t *handle; - struct inode *inode; - - inode = dquot_to_inode(dquot); - handle = ext4_journal_start(inode, - EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_commit(dquot); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext4_acquire_dquot(struct dquot *dquot) -{ - int ret, err; - handle_t *handle; - - handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_acquire(dquot); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext4_release_dquot(struct dquot *dquot) -{ - int ret, err; - handle_t *handle; - - handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_release(dquot); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext4_mark_dquot_dirty(struct dquot *dquot) -{ - /* Are we journalling quotas? */ - if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { - dquot_mark_dquot_dirty(dquot); - return ext4_write_dquot(dquot); - } else { - return dquot_mark_dquot_dirty(dquot); - } -} - -static int ext4_write_info(struct super_block *sb, int type) -{ - int ret, err; - handle_t *handle; - - /* Data block + inode block */ - handle = ext4_journal_start(sb->s_root->d_inode, 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_commit_info(sb, type); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -/* - * Turn on quotas during mount time - we need to find - * the quota file and such... - */ -static int ext4_quota_on_mount(struct super_block *sb, int type) -{ - return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], - EXT4_SB(sb)->s_jquota_fmt, type); -} - -/* - * Standard function to be called on quota_on - */ -static int ext4_quota_on(struct super_block *sb, int type, int format_id, - char *path) -{ - int err; - struct nameidata nd; - - if (!test_opt(sb, QUOTA)) - return -EINVAL; - /* Not journalling quota? */ - if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] && - !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) - return vfs_quota_on(sb, type, format_id, path); - err = path_lookup(path, LOOKUP_FOLLOW, &nd); - if (err) - return err; - /* Quotafile not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) { - path_release(&nd); - return -EXDEV; - } - /* Quotafile not of fs root? */ - if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journalled quota will not work.\n"); - path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); -} - -/* Read data from quotafile - avoid pagecache and such because we cannot afford - * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) - * we don't have to be afraid of races */ -static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - size_t toread; - struct buffer_head *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off+len > i_size) - len = i_size-off; - toread = len; - while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? - sb->s_blocksize - offset : toread; - bh = ext4_bread(NULL, inode, blk, 0, &err); - if (err) - return err; - if (!bh) /* A hole? */ - memset(data, 0, tocopy); - else - memcpy(data, bh->b_data+offset, tocopy); - brelse(bh); - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; -} - -/* Write to quotafile (we know the transaction is already started and has - * enough credits) */ -static ssize_t ext4_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; - struct buffer_head *bh; - handle_t *handle = journal_current_handle(); - - mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - bh = ext4_bread(handle, inode, blk, 1, &err); - if (!bh) - goto out; - if (journal_quota) { - err = ext4_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - goto out; - } - } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - unlock_buffer(bh); - if (journal_quota) - err = ext4_journal_dirty_metadata(handle, bh); - else { - /* Always do at least ordered writes for quotas */ - err = ext4_journal_dirty_data(handle, bh); - mark_buffer_dirty(bh); - } - brelse(bh); - if (err) - goto out; - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; - } -out: - if (len == towrite) - return err; - if (inode->i_size < off+len-towrite) { - i_size_write(inode, off+len-towrite); - EXT4_I(inode)->i_disksize = inode->i_size; - } - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext4_mark_inode_dirty(handle, inode); - mutex_unlock(&inode->i_mutex); - return len - towrite; -} - -#endif - -static int ext4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) -{ - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); -} - -static struct file_system_type ext4dev_fs_type = { - .owner = THIS_MODULE, - .name = "ext4dev", - .get_sb = ext4_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -static int __init init_ext4_fs(void) -{ - int err = init_ext4_xattr(); - if (err) - return err; - err = init_inodecache(); - if (err) - goto out1; - err = register_filesystem(&ext4dev_fs_type); - if (err) - goto out; - return 0; -out: - destroy_inodecache(); -out1: - exit_ext4_xattr(); - return err; -} - -static void __exit exit_ext4_fs(void) -{ - unregister_filesystem(&ext4dev_fs_type); - destroy_inodecache(); - exit_ext4_xattr(); -} - -MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); -MODULE_LICENSE("GPL"); -module_init(init_ext4_fs) -module_exit(exit_ext4_fs) diff --git a/trunk/fs/ext4/symlink.c b/trunk/fs/ext4/symlink.c deleted file mode 100644 index fcf527286d75..000000000000 --- a/trunk/fs/ext4/symlink.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * linux/fs/ext4/symlink.c - * - * Only fast symlinks left here - the rest is done by generic code. AV, 1999 - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/symlink.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext4 symlink handling code - */ - -#include -#include -#include -#include -#include "xattr.h" - -static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); - nd_set_link(nd, (char*)ei->i_data); - return NULL; -} - -struct inode_operations ext4_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, -#ifdef CONFIG_EXT4DEV_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext4_listxattr, - .removexattr = generic_removexattr, -#endif -}; - -struct inode_operations ext4_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = ext4_follow_link, -#ifdef CONFIG_EXT4DEV_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext4_listxattr, - .removexattr = generic_removexattr, -#endif -}; diff --git a/trunk/fs/ext4/xattr.c b/trunk/fs/ext4/xattr.c deleted file mode 100644 index 63233cd946a7..000000000000 --- a/trunk/fs/ext4/xattr.c +++ /dev/null @@ -1,1317 +0,0 @@ -/* - * linux/fs/ext4/xattr.c - * - * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * Fix by Harrison Xing . - * Ext4 code with a lot of help from Eric Jarman . - * Extended attributes for symlinks and special files added per - * suggestion of Luka Renko . - * xattr consolidation Copyright (c) 2004 James Morris , - * Red Hat Inc. - * ea-in-inode support by Alex Tomas aka bzzz - * and Andreas Gruenbacher . - */ - -/* - * Extended attributes are stored directly in inodes (on file systems with - * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl - * field contains the block number if an inode uses an additional block. All - * attributes must fit in the inode and one additional block. Blocks that - * contain the identical set of attributes may be shared among several inodes. - * Identical blocks are detected by keeping a cache of blocks that have - * recently been accessed. - * - * The attributes in inodes and on blocks have a different header; the entries - * are stored in the same format: - * - * +------------------+ - * | header | - * | entry 1 | | - * | entry 2 | | growing downwards - * | entry 3 | v - * | four null bytes | - * | . . . | - * | value 1 | ^ - * | value 3 | | growing upwards - * | value 2 | | - * +------------------+ - * - * The header is followed by multiple entry descriptors. In disk blocks, the - * entry descriptors are kept sorted. In inodes, they are unsorted. The - * attribute values are aligned to the end of the block in no specific order. - * - * Locking strategy - * ---------------- - * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem. - * EA blocks are only changed if they are exclusive to an inode, so - * holding xattr_sem also means that nothing but the EA block's reference - * count can change. Multiple writers to the same block are synchronized - * by the buffer lock. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "xattr.h" -#include "acl.h" - -#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) -#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) -#define BFIRST(bh) ENTRY(BHDR(bh)+1) -#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) - -#define IHDR(inode, raw_inode) \ - ((struct ext4_xattr_ibody_header *) \ - ((void *)raw_inode + \ - EXT4_GOOD_OLD_INODE_SIZE + \ - EXT4_I(inode)->i_extra_isize)) -#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) - -#ifdef EXT4_XATTR_DEBUG -# define ea_idebug(inode, f...) do { \ - printk(KERN_DEBUG "inode %s:%lu: ", \ - inode->i_sb->s_id, inode->i_ino); \ - printk(f); \ - printk("\n"); \ - } while (0) -# define ea_bdebug(bh, f...) do { \ - char b[BDEVNAME_SIZE]; \ - printk(KERN_DEBUG "block %s:%lu: ", \ - bdevname(bh->b_bdev, b), \ - (unsigned long) bh->b_blocknr); \ - printk(f); \ - printk("\n"); \ - } while (0) -#else -# define ea_idebug(f...) -# define ea_bdebug(f...) -#endif - -static void ext4_xattr_cache_insert(struct buffer_head *); -static struct buffer_head *ext4_xattr_cache_find(struct inode *, - struct ext4_xattr_header *, - struct mb_cache_entry **); -static void ext4_xattr_rehash(struct ext4_xattr_header *, - struct ext4_xattr_entry *); - -static struct mb_cache *ext4_xattr_cache; - -static struct xattr_handler *ext4_xattr_handler_map[] = { - [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, - [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, -#endif - [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, -#ifdef CONFIG_EXT4DEV_FS_SECURITY - [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, -#endif -}; - -struct xattr_handler *ext4_xattr_handlers[] = { - &ext4_xattr_user_handler, - &ext4_xattr_trusted_handler, -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - &ext4_xattr_acl_access_handler, - &ext4_xattr_acl_default_handler, -#endif -#ifdef CONFIG_EXT4DEV_FS_SECURITY - &ext4_xattr_security_handler, -#endif - NULL -}; - -static inline struct xattr_handler * -ext4_xattr_handler(int name_index) -{ - struct xattr_handler *handler = NULL; - - if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) - handler = ext4_xattr_handler_map[name_index]; - return handler; -} - -/* - * Inode operation listxattr() - * - * dentry->d_inode->i_mutex: don't care - */ -ssize_t -ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - return ext4_xattr_list(dentry->d_inode, buffer, size); -} - -static int -ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) -{ - while (!IS_LAST_ENTRY(entry)) { - struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); - if ((void *)next >= end) - return -EIO; - entry = next; - } - return 0; -} - -static inline int -ext4_xattr_check_block(struct buffer_head *bh) -{ - int error; - - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || - BHDR(bh)->h_blocks != cpu_to_le32(1)) - return -EIO; - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); - return error; -} - -static inline int -ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) -{ - size_t value_size = le32_to_cpu(entry->e_value_size); - - if (entry->e_value_block != 0 || value_size > size || - le16_to_cpu(entry->e_value_offs) + value_size > size) - return -EIO; - return 0; -} - -static int -ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, - const char *name, size_t size, int sorted) -{ - struct ext4_xattr_entry *entry; - size_t name_len; - int cmp = 1; - - if (name == NULL) - return -EINVAL; - name_len = strlen(name); - entry = *pentry; - for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { - cmp = name_index - entry->e_name_index; - if (!cmp) - cmp = name_len - entry->e_name_len; - if (!cmp) - cmp = memcmp(name, entry->e_name, name_len); - if (cmp <= 0 && (sorted || cmp == 0)) - break; - } - *pentry = entry; - if (!cmp && ext4_xattr_check_entry(entry, size)) - return -EIO; - return cmp ? -ENODATA : 0; -} - -static int -ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - struct buffer_head *bh = NULL; - struct ext4_xattr_entry *entry; - size_t size; - int error; - - ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", - name_index, name, buffer, (long)buffer_size); - - error = -ENODATA; - if (!EXT4_I(inode)->i_file_acl) - goto cleanup; - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) - goto cleanup; - ea_bdebug(bh, "b_count=%d, refcount=%d", - atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(bh)) { -bad_block: ext4_error(inode->i_sb, __FUNCTION__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); - error = -EIO; - goto cleanup; - } - ext4_xattr_cache_insert(bh); - entry = BFIRST(bh); - error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); - if (error == -EIO) - goto bad_block; - if (error) - goto cleanup; - size = le32_to_cpu(entry->e_value_size); - if (buffer) { - error = -ERANGE; - if (size > buffer_size) - goto cleanup; - memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), - size); - } - error = size; - -cleanup: - brelse(bh); - return error; -} - -static int -ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - struct ext4_xattr_ibody_header *header; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - size_t size; - void *end; - int error; - - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) - return -ENODATA; - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - raw_inode = ext4_raw_inode(&iloc); - header = IHDR(inode, raw_inode); - entry = IFIRST(header); - end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(entry, end); - if (error) - goto cleanup; - error = ext4_xattr_find_entry(&entry, name_index, name, - end - (void *)entry, 0); - if (error) - goto cleanup; - size = le32_to_cpu(entry->e_value_size); - if (buffer) { - error = -ERANGE; - if (size > buffer_size) - goto cleanup; - memcpy(buffer, (void *)IFIRST(header) + - le16_to_cpu(entry->e_value_offs), size); - } - error = size; - -cleanup: - brelse(iloc.bh); - return error; -} - -/* - * ext4_xattr_get() - * - * Copy an extended attribute into the buffer - * provided, or compute the buffer size required. - * Buffer is NULL to compute the size of the buffer required. - * - * Returns a negative error number on failure, or the number of bytes - * used / required on success. - */ -int -ext4_xattr_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - int error; - - down_read(&EXT4_I(inode)->xattr_sem); - error = ext4_xattr_ibody_get(inode, name_index, name, buffer, - buffer_size); - if (error == -ENODATA) - error = ext4_xattr_block_get(inode, name_index, name, buffer, - buffer_size); - up_read(&EXT4_I(inode)->xattr_sem); - return error; -} - -static int -ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, - char *buffer, size_t buffer_size) -{ - size_t rest = buffer_size; - - for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { - struct xattr_handler *handler = - ext4_xattr_handler(entry->e_name_index); - - if (handler) { - size_t size = handler->list(inode, buffer, rest, - entry->e_name, - entry->e_name_len); - if (buffer) { - if (size > rest) - return -ERANGE; - buffer += size; - } - rest -= size; - } - } - return buffer_size - rest; -} - -static int -ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) -{ - struct buffer_head *bh = NULL; - int error; - - ea_idebug(inode, "buffer=%p, buffer_size=%ld", - buffer, (long)buffer_size); - - error = 0; - if (!EXT4_I(inode)->i_file_acl) - goto cleanup; - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - error = -EIO; - if (!bh) - goto cleanup; - ea_bdebug(bh, "b_count=%d, refcount=%d", - atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __FUNCTION__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); - error = -EIO; - goto cleanup; - } - ext4_xattr_cache_insert(bh); - error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); - -cleanup: - brelse(bh); - - return error; -} - -static int -ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -{ - struct ext4_xattr_ibody_header *header; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - void *end; - int error; - - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) - return 0; - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - raw_inode = ext4_raw_inode(&iloc); - header = IHDR(inode, raw_inode); - end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(IFIRST(header), end); - if (error) - goto cleanup; - error = ext4_xattr_list_entries(inode, IFIRST(header), - buffer, buffer_size); - -cleanup: - brelse(iloc.bh); - return error; -} - -/* - * ext4_xattr_list() - * - * Copy a list of attribute names into the buffer - * provided, or compute the buffer size required. - * Buffer is NULL to compute the size of the buffer required. - * - * Returns a negative error number on failure, or the number of bytes - * used / required on success. - */ -int -ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -{ - int i_error, b_error; - - down_read(&EXT4_I(inode)->xattr_sem); - i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); - if (i_error < 0) { - b_error = 0; - } else { - if (buffer) { - buffer += i_error; - buffer_size -= i_error; - } - b_error = ext4_xattr_block_list(inode, buffer, buffer_size); - if (b_error < 0) - i_error = 0; - } - up_read(&EXT4_I(inode)->xattr_sem); - return i_error + b_error; -} - -/* - * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. - */ -static void ext4_xattr_update_super_block(handle_t *handle, - struct super_block *sb) -{ - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) - return; - - lock_super(sb); - if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { - EXT4_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); - } - unlock_super(sb); -} - -/* - * Release the xattr block BH: If the reference count is > 1, decrement - * it; otherwise free the block. - */ -static void -ext4_xattr_release_block(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - struct mb_cache_entry *ce = NULL; - - ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); - if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { - ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); - ext4_free_blocks(handle, inode, bh->b_blocknr, 1); - get_bh(bh); - ext4_forget(handle, 1, inode, bh, bh->b_blocknr); - } else { - if (ext4_journal_get_write_access(handle, bh) == 0) { - lock_buffer(bh); - BHDR(bh)->h_refcount = cpu_to_le32( - le32_to_cpu(BHDR(bh)->h_refcount) - 1); - ext4_journal_dirty_metadata(handle, bh); - if (IS_SYNC(inode)) - handle->h_sync = 1; - DQUOT_FREE_BLOCK(inode, 1); - unlock_buffer(bh); - ea_bdebug(bh, "refcount now=%d; releasing", - le32_to_cpu(BHDR(bh)->h_refcount)); - } - if (ce) - mb_cache_entry_release(ce); - } -} - -struct ext4_xattr_info { - int name_index; - const char *name; - const void *value; - size_t value_len; -}; - -struct ext4_xattr_search { - struct ext4_xattr_entry *first; - void *base; - void *end; - struct ext4_xattr_entry *here; - int not_found; -}; - -static int -ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) -{ - struct ext4_xattr_entry *last; - size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); - - /* Compute min_offs and last. */ - last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { - if (!last->e_value_block && last->e_value_size) { - size_t offs = le16_to_cpu(last->e_value_offs); - if (offs < min_offs) - min_offs = offs; - } - } - free = min_offs - ((void *)last - s->base) - sizeof(__u32); - if (!s->not_found) { - if (!s->here->e_value_block && s->here->e_value_size) { - size_t size = le32_to_cpu(s->here->e_value_size); - free += EXT4_XATTR_SIZE(size); - } - free += EXT4_XATTR_LEN(name_len); - } - if (i->value) { - if (free < EXT4_XATTR_SIZE(i->value_len) || - free < EXT4_XATTR_LEN(name_len) + - EXT4_XATTR_SIZE(i->value_len)) - return -ENOSPC; - } - - if (i->value && s->not_found) { - /* Insert the new name. */ - size_t size = EXT4_XATTR_LEN(name_len); - size_t rest = (void *)last - (void *)s->here + sizeof(__u32); - memmove((void *)s->here + size, s->here, rest); - memset(s->here, 0, size); - s->here->e_name_index = i->name_index; - s->here->e_name_len = name_len; - memcpy(s->here->e_name, i->name, name_len); - } else { - if (!s->here->e_value_block && s->here->e_value_size) { - void *first_val = s->base + min_offs; - size_t offs = le16_to_cpu(s->here->e_value_offs); - void *val = s->base + offs; - size_t size = EXT4_XATTR_SIZE( - le32_to_cpu(s->here->e_value_size)); - - if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) { - /* The old and the new value have the same - size. Just replace. */ - s->here->e_value_size = - cpu_to_le32(i->value_len); - memset(val + size - EXT4_XATTR_PAD, 0, - EXT4_XATTR_PAD); /* Clear pad bytes. */ - memcpy(val, i->value, i->value_len); - return 0; - } - - /* Remove the old value. */ - memmove(first_val + size, first_val, val - first_val); - memset(first_val, 0, size); - s->here->e_value_size = 0; - s->here->e_value_offs = 0; - min_offs += size; - - /* Adjust all value offsets. */ - last = s->first; - while (!IS_LAST_ENTRY(last)) { - size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && - last->e_value_size && o < offs) - last->e_value_offs = - cpu_to_le16(o + size); - last = EXT4_XATTR_NEXT(last); - } - } - if (!i->value) { - /* Remove the old name. */ - size_t size = EXT4_XATTR_LEN(name_len); - last = ENTRY((void *)last - size); - memmove(s->here, (void *)s->here + size, - (void *)last - (void *)s->here + sizeof(__u32)); - memset(last, 0, size); - } - } - - if (i->value) { - /* Insert the new value. */ - s->here->e_value_size = cpu_to_le32(i->value_len); - if (i->value_len) { - size_t size = EXT4_XATTR_SIZE(i->value_len); - void *val = s->base + min_offs - size; - s->here->e_value_offs = cpu_to_le16(min_offs - size); - memset(val + size - EXT4_XATTR_PAD, 0, - EXT4_XATTR_PAD); /* Clear the pad bytes. */ - memcpy(val, i->value, i->value_len); - } - } - return 0; -} - -struct ext4_xattr_block_find { - struct ext4_xattr_search s; - struct buffer_head *bh; -}; - -static int -ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, - struct ext4_xattr_block_find *bs) -{ - struct super_block *sb = inode->i_sb; - int error; - - ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", - i->name_index, i->name, i->value, (long)i->value_len); - - if (EXT4_I(inode)->i_file_acl) { - /* The inode already has an extended attribute block. */ - bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl); - error = -EIO; - if (!bs->bh) - goto cleanup; - ea_bdebug(bs->bh, "b_count=%d, refcount=%d", - atomic_read(&(bs->bh->b_count)), - le32_to_cpu(BHDR(bs->bh)->h_refcount)); - if (ext4_xattr_check_block(bs->bh)) { - ext4_error(sb, __FUNCTION__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); - error = -EIO; - goto cleanup; - } - /* Find the named attribute. */ - bs->s.base = BHDR(bs->bh); - bs->s.first = BFIRST(bs->bh); - bs->s.end = bs->bh->b_data + bs->bh->b_size; - bs->s.here = bs->s.first; - error = ext4_xattr_find_entry(&bs->s.here, i->name_index, - i->name, bs->bh->b_size, 1); - if (error && error != -ENODATA) - goto cleanup; - bs->s.not_found = error; - } - error = 0; - -cleanup: - return error; -} - -static int -ext4_xattr_block_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_block_find *bs) -{ - struct super_block *sb = inode->i_sb; - struct buffer_head *new_bh = NULL; - struct ext4_xattr_search *s = &bs->s; - struct mb_cache_entry *ce = NULL; - int error; - -#define header(x) ((struct ext4_xattr_header *)(x)) - - if (i->value && i->value_len > sb->s_blocksize) - return -ENOSPC; - if (s->base) { - ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, - bs->bh->b_blocknr); - if (header(s->base)->h_refcount == cpu_to_le32(1)) { - if (ce) { - mb_cache_entry_free(ce); - ce = NULL; - } - ea_bdebug(bs->bh, "modifying in-place"); - error = ext4_journal_get_write_access(handle, bs->bh); - if (error) - goto cleanup; - lock_buffer(bs->bh); - error = ext4_xattr_set_entry(i, s); - if (!error) { - if (!IS_LAST_ENTRY(s->first)) - ext4_xattr_rehash(header(s->base), - s->here); - ext4_xattr_cache_insert(bs->bh); - } - unlock_buffer(bs->bh); - if (error == -EIO) - goto bad_block; - if (!error) - error = ext4_journal_dirty_metadata(handle, - bs->bh); - if (error) - goto cleanup; - goto inserted; - } else { - int offset = (char *)s->here - bs->bh->b_data; - - if (ce) { - mb_cache_entry_release(ce); - ce = NULL; - } - ea_bdebug(bs->bh, "cloning"); - s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); - error = -ENOMEM; - if (s->base == NULL) - goto cleanup; - memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); - s->first = ENTRY(header(s->base)+1); - header(s->base)->h_refcount = cpu_to_le32(1); - s->here = ENTRY(s->base + offset); - s->end = s->base + bs->bh->b_size; - } - } else { - /* Allocate a buffer where we construct the new block. */ - s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); - /* assert(header == s->base) */ - error = -ENOMEM; - if (s->base == NULL) - goto cleanup; - memset(s->base, 0, sb->s_blocksize); - header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); - header(s->base)->h_blocks = cpu_to_le32(1); - header(s->base)->h_refcount = cpu_to_le32(1); - s->first = ENTRY(header(s->base)+1); - s->here = ENTRY(header(s->base)+1); - s->end = s->base + sb->s_blocksize; - } - - error = ext4_xattr_set_entry(i, s); - if (error == -EIO) - goto bad_block; - if (error) - goto cleanup; - if (!IS_LAST_ENTRY(s->first)) - ext4_xattr_rehash(header(s->base), s->here); - -inserted: - if (!IS_LAST_ENTRY(s->first)) { - new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce); - if (new_bh) { - /* We found an identical block in the cache. */ - if (new_bh == bs->bh) - ea_bdebug(new_bh, "keeping"); - else { - /* The old block is released after updating - the inode. */ - error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; - error = ext4_journal_get_write_access(handle, - new_bh); - if (error) - goto cleanup_dquot; - lock_buffer(new_bh); - BHDR(new_bh)->h_refcount = cpu_to_le32(1 + - le32_to_cpu(BHDR(new_bh)->h_refcount)); - ea_bdebug(new_bh, "reusing; refcount now=%d", - le32_to_cpu(BHDR(new_bh)->h_refcount)); - unlock_buffer(new_bh); - error = ext4_journal_dirty_metadata(handle, - new_bh); - if (error) - goto cleanup_dquot; - } - mb_cache_entry_release(ce); - ce = NULL; - } else if (bs->bh && s->base == bs->bh->b_data) { - /* We were modifying this block in-place. */ - ea_bdebug(bs->bh, "keeping this block"); - new_bh = bs->bh; - get_bh(new_bh); - } else { - /* We need to allocate a new block */ - ext4_fsblk_t goal = le32_to_cpu( - EXT4_SB(sb)->s_es->s_first_data_block) + - (ext4_fsblk_t)EXT4_I(inode)->i_block_group * - EXT4_BLOCKS_PER_GROUP(sb); - ext4_fsblk_t block = ext4_new_block(handle, inode, - goal, &error); - if (error) - goto cleanup; - ea_idebug(inode, "creating block %d", block); - - new_bh = sb_getblk(sb, block); - if (!new_bh) { -getblk_failed: - ext4_free_blocks(handle, inode, block, 1); - error = -EIO; - goto cleanup; - } - lock_buffer(new_bh); - error = ext4_journal_get_create_access(handle, new_bh); - if (error) { - unlock_buffer(new_bh); - goto getblk_failed; - } - memcpy(new_bh->b_data, s->base, new_bh->b_size); - set_buffer_uptodate(new_bh); - unlock_buffer(new_bh); - ext4_xattr_cache_insert(new_bh); - error = ext4_journal_dirty_metadata(handle, new_bh); - if (error) - goto cleanup; - } - } - - /* Update the inode. */ - EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; - - /* Drop the previous xattr block. */ - if (bs->bh && bs->bh != new_bh) - ext4_xattr_release_block(handle, inode, bs->bh); - error = 0; - -cleanup: - if (ce) - mb_cache_entry_release(ce); - brelse(new_bh); - if (!(bs->bh && s->base == bs->bh->b_data)) - kfree(s->base); - - return error; - -cleanup_dquot: - DQUOT_FREE_BLOCK(inode, 1); - goto cleanup; - -bad_block: - ext4_error(inode->i_sb, __FUNCTION__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); - goto cleanup; - -#undef header -} - -struct ext4_xattr_ibody_find { - struct ext4_xattr_search s; - struct ext4_iloc iloc; -}; - -static int -ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is) -{ - struct ext4_xattr_ibody_header *header; - struct ext4_inode *raw_inode; - int error; - - if (EXT4_I(inode)->i_extra_isize == 0) - return 0; - raw_inode = ext4_raw_inode(&is->iloc); - header = IHDR(inode, raw_inode); - is->s.base = is->s.first = IFIRST(header); - is->s.here = is->s.first; - is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { - error = ext4_xattr_check_names(IFIRST(header), is->s.end); - if (error) - return error; - /* Find the named attribute. */ - error = ext4_xattr_find_entry(&is->s.here, i->name_index, - i->name, is->s.end - - (void *)is->s.base, 0); - if (error && error != -ENODATA) - return error; - is->s.not_found = error; - } - return 0; -} - -static int -ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is) -{ - struct ext4_xattr_ibody_header *header; - struct ext4_xattr_search *s = &is->s; - int error; - - if (EXT4_I(inode)->i_extra_isize == 0) - return -ENOSPC; - error = ext4_xattr_set_entry(i, s); - if (error) - return error; - header = IHDR(inode, ext4_raw_inode(&is->iloc)); - if (!IS_LAST_ENTRY(s->first)) { - header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); - EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; - } else { - header->h_magic = cpu_to_le32(0); - EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; - } - return 0; -} - -/* - * ext4_xattr_set_handle() - * - * Create, replace or remove an extended attribute for this inode. Buffer - * is NULL to remove an existing extended attribute, and non-NULL to - * either replace an existing extended attribute, or create a new extended - * attribute. The flags XATTR_REPLACE and XATTR_CREATE - * specify that an extended attribute must exist and must not exist - * previous to the call, respectively. - * - * Returns 0, or a negative error number on failure. - */ -int -ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) -{ - struct ext4_xattr_info i = { - .name_index = name_index, - .name = name, - .value = value, - .value_len = value_len, - - }; - struct ext4_xattr_ibody_find is = { - .s = { .not_found = -ENODATA, }, - }; - struct ext4_xattr_block_find bs = { - .s = { .not_found = -ENODATA, }, - }; - int error; - - if (!name) - return -EINVAL; - if (strlen(name) > 255) - return -ERANGE; - down_write(&EXT4_I(inode)->xattr_sem); - error = ext4_get_inode_loc(inode, &is.iloc); - if (error) - goto cleanup; - - if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { - struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); - memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; - } - - error = ext4_xattr_ibody_find(inode, &i, &is); - if (error) - goto cleanup; - if (is.s.not_found) - error = ext4_xattr_block_find(inode, &i, &bs); - if (error) - goto cleanup; - if (is.s.not_found && bs.s.not_found) { - error = -ENODATA; - if (flags & XATTR_REPLACE) - goto cleanup; - error = 0; - if (!value) - goto cleanup; - } else { - error = -EEXIST; - if (flags & XATTR_CREATE) - goto cleanup; - } - error = ext4_journal_get_write_access(handle, is.iloc.bh); - if (error) - goto cleanup; - if (!value) { - if (!is.s.not_found) - error = ext4_xattr_ibody_set(handle, inode, &i, &is); - else if (!bs.s.not_found) - error = ext4_xattr_block_set(handle, inode, &i, &bs); - } else { - error = ext4_xattr_ibody_set(handle, inode, &i, &is); - if (!error && !bs.s.not_found) { - i.value = NULL; - error = ext4_xattr_block_set(handle, inode, &i, &bs); - } else if (error == -ENOSPC) { - error = ext4_xattr_block_set(handle, inode, &i, &bs); - if (error) - goto cleanup; - if (!is.s.not_found) { - i.value = NULL; - error = ext4_xattr_ibody_set(handle, inode, &i, - &is); - } - } - } - if (!error) { - ext4_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; - error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); - /* - * The bh is consumed by ext4_mark_iloc_dirty, even with - * error != 0. - */ - is.iloc.bh = NULL; - if (IS_SYNC(inode)) - handle->h_sync = 1; - } - -cleanup: - brelse(is.iloc.bh); - brelse(bs.bh); - up_write(&EXT4_I(inode)->xattr_sem); - return error; -} - -/* - * ext4_xattr_set() - * - * Like ext4_xattr_set_handle, but start from an inode. This extended - * attribute modification is a filesystem transaction by itself. - * - * Returns 0, or a negative error number on failure. - */ -int -ext4_xattr_set(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, int flags) -{ - handle_t *handle; - int error, retries = 0; - -retry: - handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - } else { - int error2; - - error = ext4_xattr_set_handle(handle, inode, name_index, name, - value, value_len, flags); - error2 = ext4_journal_stop(handle); - if (error == -ENOSPC && - ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - if (error == 0) - error = error2; - } - - return error; -} - -/* - * ext4_xattr_delete_inode() - * - * Free extended attribute resources associated with this inode. This - * is called immediately before an inode is freed. We have exclusive - * access to the inode. - */ -void -ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) -{ - struct buffer_head *bh = NULL; - - if (!EXT4_I(inode)->i_file_acl) - goto cleanup; - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) { - ext4_error(inode->i_sb, __FUNCTION__, - "inode %lu: block %llu read error", inode->i_ino, - EXT4_I(inode)->i_file_acl); - goto cleanup; - } - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || - BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext4_error(inode->i_sb, __FUNCTION__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); - goto cleanup; - } - ext4_xattr_release_block(handle, inode, bh); - EXT4_I(inode)->i_file_acl = 0; - -cleanup: - brelse(bh); -} - -/* - * ext4_xattr_put_super() - * - * This is called when a file system is unmounted. - */ -void -ext4_xattr_put_super(struct super_block *sb) -{ - mb_cache_shrink(sb->s_bdev); -} - -/* - * ext4_xattr_cache_insert() - * - * Create a new entry in the extended attribute cache, and insert - * it unless such an entry is already in the cache. - * - * Returns 0, or a negative error number on failure. - */ -static void -ext4_xattr_cache_insert(struct buffer_head *bh) -{ - __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); - struct mb_cache_entry *ce; - int error; - - ce = mb_cache_entry_alloc(ext4_xattr_cache); - if (!ce) { - ea_bdebug(bh, "out of memory"); - return; - } - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); - if (error) { - mb_cache_entry_free(ce); - if (error == -EBUSY) { - ea_bdebug(bh, "already in cache"); - error = 0; - } - } else { - ea_bdebug(bh, "inserting [%x]", (int)hash); - mb_cache_entry_release(ce); - } -} - -/* - * ext4_xattr_cmp() - * - * Compare two extended attribute blocks for equality. - * - * Returns 0 if the blocks are equal, 1 if they differ, and - * a negative error number on errors. - */ -static int -ext4_xattr_cmp(struct ext4_xattr_header *header1, - struct ext4_xattr_header *header2) -{ - struct ext4_xattr_entry *entry1, *entry2; - - entry1 = ENTRY(header1+1); - entry2 = ENTRY(header2+1); - while (!IS_LAST_ENTRY(entry1)) { - if (IS_LAST_ENTRY(entry2)) - return 1; - if (entry1->e_hash != entry2->e_hash || - entry1->e_name_index != entry2->e_name_index || - entry1->e_name_len != entry2->e_name_len || - entry1->e_value_size != entry2->e_value_size || - memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) - return 1; - if (entry1->e_value_block != 0 || entry2->e_value_block != 0) - return -EIO; - if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), - (char *)header2 + le16_to_cpu(entry2->e_value_offs), - le32_to_cpu(entry1->e_value_size))) - return 1; - - entry1 = EXT4_XATTR_NEXT(entry1); - entry2 = EXT4_XATTR_NEXT(entry2); - } - if (!IS_LAST_ENTRY(entry2)) - return 1; - return 0; -} - -/* - * ext4_xattr_cache_find() - * - * Find an identical extended attribute block. - * - * Returns a pointer to the block found, or NULL if such a block was - * not found or an error occurred. - */ -static struct buffer_head * -ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, - struct mb_cache_entry **pce) -{ - __u32 hash = le32_to_cpu(header->h_hash); - struct mb_cache_entry *ce; - - if (!header->h_hash) - return NULL; /* never share */ - ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -again: - ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, - inode->i_sb->s_bdev, hash); - while (ce) { - struct buffer_head *bh; - - if (IS_ERR(ce)) { - if (PTR_ERR(ce) == -EAGAIN) - goto again; - break; - } - bh = sb_bread(inode->i_sb, ce->e_block); - if (!bh) { - ext4_error(inode->i_sb, __FUNCTION__, - "inode %lu: block %lu read error", - inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= - EXT4_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %lu refcount %d>=%d", - (unsigned long) ce->e_block, - le32_to_cpu(BHDR(bh)->h_refcount), - EXT4_XATTR_REFCOUNT_MAX); - } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { - *pce = ce; - return bh; - } - brelse(bh); - ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); - } - return NULL; -} - -#define NAME_HASH_SHIFT 5 -#define VALUE_HASH_SHIFT 16 - -/* - * ext4_xattr_hash_entry() - * - * Compute the hash of an extended attribute. - */ -static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, - struct ext4_xattr_entry *entry) -{ - __u32 hash = 0; - char *name = entry->e_name; - int n; - - for (n=0; n < entry->e_name_len; n++) { - hash = (hash << NAME_HASH_SHIFT) ^ - (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ - *name++; - } - - if (entry->e_value_block == 0 && entry->e_value_size != 0) { - __le32 *value = (__le32 *)((char *)header + - le16_to_cpu(entry->e_value_offs)); - for (n = (le32_to_cpu(entry->e_value_size) + - EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) { - hash = (hash << VALUE_HASH_SHIFT) ^ - (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ - le32_to_cpu(*value++); - } - } - entry->e_hash = cpu_to_le32(hash); -} - -#undef NAME_HASH_SHIFT -#undef VALUE_HASH_SHIFT - -#define BLOCK_HASH_SHIFT 16 - -/* - * ext4_xattr_rehash() - * - * Re-compute the extended attribute hash value after an entry has changed. - */ -static void ext4_xattr_rehash(struct ext4_xattr_header *header, - struct ext4_xattr_entry *entry) -{ - struct ext4_xattr_entry *here; - __u32 hash = 0; - - ext4_xattr_hash_entry(header, entry); - here = ENTRY(header+1); - while (!IS_LAST_ENTRY(here)) { - if (!here->e_hash) { - /* Block is not shared if an entry's hash value == 0 */ - hash = 0; - break; - } - hash = (hash << BLOCK_HASH_SHIFT) ^ - (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ - le32_to_cpu(here->e_hash); - here = EXT4_XATTR_NEXT(here); - } - header->h_hash = cpu_to_le32(hash); -} - -#undef BLOCK_HASH_SHIFT - -int __init -init_ext4_xattr(void) -{ - ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); - if (!ext4_xattr_cache) - return -ENOMEM; - return 0; -} - -void -exit_ext4_xattr(void) -{ - if (ext4_xattr_cache) - mb_cache_destroy(ext4_xattr_cache); - ext4_xattr_cache = NULL; -} diff --git a/trunk/fs/ext4/xattr.h b/trunk/fs/ext4/xattr.h deleted file mode 100644 index 79432b35398f..000000000000 --- a/trunk/fs/ext4/xattr.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - File: fs/ext4/xattr.h - - On-disk format of extended attributes for the ext4 filesystem. - - (C) 2001 Andreas Gruenbacher, -*/ - -#include - -/* Magic value in attribute blocks */ -#define EXT4_XATTR_MAGIC 0xEA020000 - -/* Maximum number of references to one attribute block */ -#define EXT4_XATTR_REFCOUNT_MAX 1024 - -/* Name indexes */ -#define EXT4_XATTR_INDEX_USER 1 -#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2 -#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -#define EXT4_XATTR_INDEX_TRUSTED 4 -#define EXT4_XATTR_INDEX_LUSTRE 5 -#define EXT4_XATTR_INDEX_SECURITY 6 - -struct ext4_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_refcount; /* reference count */ - __le32 h_blocks; /* number of disk blocks used */ - __le32 h_hash; /* hash value of all attributes */ - __u32 h_reserved[4]; /* zero right now */ -}; - -struct ext4_xattr_ibody_header { - __le32 h_magic; /* magic number for identification */ -}; - -struct ext4_xattr_entry { - __u8 e_name_len; /* length of name */ - __u8 e_name_index; /* attribute name index */ - __le16 e_value_offs; /* offset in disk block of value */ - __le32 e_value_block; /* disk block attribute is stored on (n/i) */ - __le32 e_value_size; /* size of attribute value */ - __le32 e_hash; /* hash value of name and value */ - char e_name[0]; /* attribute name */ -}; - -#define EXT4_XATTR_PAD_BITS 2 -#define EXT4_XATTR_PAD (1<e_name_len)) ) -#define EXT4_XATTR_SIZE(size) \ - (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) - -# ifdef CONFIG_EXT4DEV_FS_XATTR - -extern struct xattr_handler ext4_xattr_user_handler; -extern struct xattr_handler ext4_xattr_trusted_handler; -extern struct xattr_handler ext4_xattr_acl_access_handler; -extern struct xattr_handler ext4_xattr_acl_default_handler; -extern struct xattr_handler ext4_xattr_security_handler; - -extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); - -extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); -extern int ext4_xattr_list(struct inode *, char *, size_t); -extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); - -extern void ext4_xattr_delete_inode(handle_t *, struct inode *); -extern void ext4_xattr_put_super(struct super_block *); - -extern int init_ext4_xattr(void); -extern void exit_ext4_xattr(void); - -extern struct xattr_handler *ext4_xattr_handlers[]; - -# else /* CONFIG_EXT4DEV_FS_XATTR */ - -static inline int -ext4_xattr_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static inline int -ext4_xattr_list(struct inode *inode, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - -static inline int -ext4_xattr_set(struct inode *inode, int name_index, const char *name, - const void *value, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static inline int -ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static inline void -ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) -{ -} - -static inline void -ext4_xattr_put_super(struct super_block *sb) -{ -} - -static inline int -init_ext4_xattr(void) -{ - return 0; -} - -static inline void -exit_ext4_xattr(void) -{ -} - -#define ext4_xattr_handlers NULL - -# endif /* CONFIG_EXT4DEV_FS_XATTR */ - -#ifdef CONFIG_EXT4DEV_FS_SECURITY -extern int ext4_init_security(handle_t *handle, struct inode *inode, - struct inode *dir); -#else -static inline int ext4_init_security(handle_t *handle, struct inode *inode, - struct inode *dir) -{ - return 0; -} -#endif diff --git a/trunk/fs/ext4/xattr_security.c b/trunk/fs/ext4/xattr_security.c deleted file mode 100644 index b6a6861951f9..000000000000 --- a/trunk/fs/ext4/xattr_security.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * linux/fs/ext4/xattr_security.c - * Handler for storing security labels as extended attributes. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "xattr.h" - -static size_t -ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; - const size_t total_len = prefix_len + name_len + 1; - - - if (list && total_len <= list_size) { - memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int -ext4_xattr_security_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, - buffer, size); -} - -static int -ext4_xattr_security_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, - value, size, flags); -} - -int -ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) -{ - int err; - size_t len; - void *value; - char *name; - - err = security_inode_init_security(inode, dir, &name, &value, &len); - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; - } - err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY, - name, value, len, 0); - kfree(name); - kfree(value); - return err; -} - -struct xattr_handler ext4_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = ext4_xattr_security_list, - .get = ext4_xattr_security_get, - .set = ext4_xattr_security_set, -}; diff --git a/trunk/fs/ext4/xattr_trusted.c b/trunk/fs/ext4/xattr_trusted.c deleted file mode 100644 index b76f2dbc82da..000000000000 --- a/trunk/fs/ext4/xattr_trusted.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * linux/fs/ext4/xattr_trusted.c - * Handler for trusted extended attributes. - * - * Copyright (C) 2003 by Andreas Gruenbacher, - */ - -#include -#include -#include -#include -#include -#include -#include -#include "xattr.h" - -#define XATTR_TRUSTED_PREFIX "trusted." - -static size_t -ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; - const size_t total_len = prefix_len + name_len + 1; - - if (!capable(CAP_SYS_ADMIN)) - return 0; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int -ext4_xattr_trusted_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, - buffer, size); -} - -static int -ext4_xattr_trusted_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, - value, size, flags); -} - -struct xattr_handler ext4_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .list = ext4_xattr_trusted_list, - .get = ext4_xattr_trusted_get, - .set = ext4_xattr_trusted_set, -}; diff --git a/trunk/fs/ext4/xattr_user.c b/trunk/fs/ext4/xattr_user.c deleted file mode 100644 index c53cded0761a..000000000000 --- a/trunk/fs/ext4/xattr_user.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * linux/fs/ext4/xattr_user.c - * Handler for extended user attributes. - * - * Copyright (C) 2001 by Andreas Gruenbacher, - */ - -#include -#include -#include -#include -#include -#include -#include "xattr.h" - -#define XATTR_USER_PREFIX "user." - -static size_t -ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; - const size_t total_len = prefix_len + name_len + 1; - - if (!test_opt(inode->i_sb, XATTR_USER)) - return 0; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_USER_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int -ext4_xattr_user_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - if (!test_opt(inode->i_sb, XATTR_USER)) - return -EOPNOTSUPP; - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); -} - -static int -ext4_xattr_user_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - if (!test_opt(inode->i_sb, XATTR_USER)) - return -EOPNOTSUPP; - return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, - value, size, flags); -} - -struct xattr_handler ext4_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .list = ext4_xattr_user_list, - .get = ext4_xattr_user_get, - .set = ext4_xattr_user_set, -}; diff --git a/trunk/fs/fat/inode.c b/trunk/fs/fat/inode.c index 78945b53b0f8..4613cb202170 100644 --- a/trunk/fs/fat/inode.c +++ b/trunk/fs/fat/inode.c @@ -1472,7 +1472,7 @@ int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) ret = writeback_inode(i1); if (!ret && i2) ret = writeback_inode(i2); - if (!ret) { + if (!ret && sb) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; ret = filemap_flush(mapping); } diff --git a/trunk/fs/gfs2/locking/dlm/mount.c b/trunk/fs/gfs2/locking/dlm/mount.c index cdd1694e889b..1f94dd35a943 100644 --- a/trunk/fs/gfs2/locking/dlm/mount.c +++ b/trunk/fs/gfs2/locking/dlm/mount.c @@ -45,7 +45,7 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, strncpy(buf, table_name, 256); buf[255] = '\0'; - p = strchr(buf, ':'); + p = strstr(buf, ":"); if (!p) { log_info("invalid table_name \"%s\"", table_name); kfree(ls); diff --git a/trunk/fs/gfs2/log.c b/trunk/fs/gfs2/log.c index 72eec6542d6a..554fe5bd1b72 100644 --- a/trunk/fs/gfs2/log.c +++ b/trunk/fs/gfs2/log.c @@ -569,15 +569,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) log_write_header(sdp, 0, PULL); lops_after_commit(sdp, ai); - - gfs2_log_lock(sdp); sdp->sd_log_head = sdp->sd_log_flush_head; + sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs; + sdp->sd_log_blks_reserved = 0; sdp->sd_log_commited_buf = 0; sdp->sd_log_num_hdrs = 0; sdp->sd_log_commited_revoke = 0; + gfs2_log_lock(sdp); if (!list_empty(&ai->ai_ail1_list)) { list_add(&ai->ai_list, &sdp->sd_ail1_list); ai = NULL; diff --git a/trunk/fs/gfs2/lops.c b/trunk/fs/gfs2/lops.c index ab6d1115f95d..881e337b6a70 100644 --- a/trunk/fs/gfs2/lops.c +++ b/trunk/fs/gfs2/lops.c @@ -492,7 +492,7 @@ static int gfs2_check_magic(struct buffer_head *bh) ptr = kaddr + bh_offset(bh); if (*ptr == cpu_to_be32(GFS2_MAGIC)) rv = 1; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(page, KM_USER0); return rv; } @@ -626,7 +626,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(page, KM_USER0); *(__be32 *)bh->b_data = 0; } else { bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); diff --git a/trunk/fs/gfs2/ops_address.c b/trunk/fs/gfs2/ops_address.c index e0599fed99ce..4fb743f4e4a4 100644 --- a/trunk/fs/gfs2/ops_address.c +++ b/trunk/fs/gfs2/ops_address.c @@ -162,7 +162,7 @@ static int zero_readpage(struct page *page) kaddr = kmap_atomic(page, KM_USER0); memset(kaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(page, KM_USER0); SetPageUptodate(page); @@ -195,7 +195,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), ip->i_di.di_size); memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(page, KM_USER0); brelse(dibh); @@ -370,22 +370,19 @@ static int gfs2_prepare_write(struct file *file, struct page *page, loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from; loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; struct gfs2_alloc *al; - unsigned int write_len = to - from; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh); error = gfs2_glock_nq_m_atime(1, &ip->i_gh); if (error) goto out_uninit; - gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks); + gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks); - error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required); + error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required); if (error) goto out_unlock; - ip->i_alloc.al_requested = 0; if (alloc_required) { al = gfs2_alloc_get(ip); @@ -485,7 +482,7 @@ static int gfs2_commit_write(struct file *file, struct page *page, kaddr = kmap_atomic(page, KM_USER0); memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, kaddr + from, to - from); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(page, KM_USER0); SetPageUptodate(page); diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h index b01e0cfc99b5..9eedfd12bfff 100644 --- a/trunk/fs/gfs2/rgrp.h +++ b/trunk/fs/gfs2/rgrp.h @@ -32,7 +32,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); static inline void gfs2_alloc_put(struct gfs2_inode *ip) { - return; /* So we can see where ip->i_alloc is used */ + return; /* Se we can see where ip->i_alloc is used */ } int gfs2_inplace_reserve_i(struct gfs2_inode *ip, diff --git a/trunk/fs/hugetlbfs/inode.c b/trunk/fs/hugetlbfs/inode.c index 4ee3f006b861..5e03b2f67b93 100644 --- a/trunk/fs/hugetlbfs/inode.c +++ b/trunk/fs/hugetlbfs/inode.c @@ -293,7 +293,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) if (h_vm_pgoff >= h_pgoff) v_offset = 0; - __unmap_hugepage_range(vma, + unmap_hugepage_range(vma, vma->vm_start + v_offset, vma->vm_end); } } diff --git a/trunk/fs/ioprio.c b/trunk/fs/ioprio.c index 89e8da112a75..6dc6721d9e82 100644 --- a/trunk/fs/ioprio.c +++ b/trunk/fs/ioprio.c @@ -150,6 +150,11 @@ int ioprio_best(unsigned short aprio, unsigned short bprio) unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); + if (!ioprio_valid(aprio)) + return bprio; + if (!ioprio_valid(bprio)) + return aprio; + if (aclass == IOPRIO_CLASS_NONE) aclass = IOPRIO_CLASS_BE; if (bclass == IOPRIO_CLASS_NONE) diff --git a/trunk/fs/jbd/journal.c b/trunk/fs/jbd/journal.c index b85c686b60db..c518dd8fe60a 100644 --- a/trunk/fs/jbd/journal.c +++ b/trunk/fs/jbd/journal.c @@ -725,7 +725,6 @@ journal_t * journal_init_dev(struct block_device *bdev, __FUNCTION__); kfree(journal); journal = NULL; - goto out; } journal->j_dev = bdev; journal->j_fs_dev = fs_dev; @@ -736,7 +735,7 @@ journal_t * journal_init_dev(struct block_device *bdev, J_ASSERT(bh != NULL); journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; -out: + return journal; } diff --git a/trunk/fs/jbd2/Makefile b/trunk/fs/jbd2/Makefile deleted file mode 100644 index 802a3413872a..000000000000 --- a/trunk/fs/jbd2/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the linux journaling routines. -# - -obj-$(CONFIG_JBD2) += jbd2.o - -jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o diff --git a/trunk/fs/jbd2/checkpoint.c b/trunk/fs/jbd2/checkpoint.c deleted file mode 100644 index 68039fa9a566..000000000000 --- a/trunk/fs/jbd2/checkpoint.c +++ /dev/null @@ -1,697 +0,0 @@ -/* - * linux/fs/checkpoint.c - * - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1999 Red Hat Software --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Checkpoint routines for the generic filesystem journaling code. - * Part of the ext2fs journaling system. - * - * Checkpointing is the process of ensuring that a section of the log is - * committed fully to disk, so that that portion of the log can be - * reused. - */ - -#include -#include -#include -#include -#include - -/* - * Unlink a buffer from a transaction checkpoint list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink_first(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - jh->b_cpnext->b_cpprev = jh->b_cpprev; - jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) { - transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; - } -} - -/* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - -/* - * Move a buffer from the checkpoint list to the checkpoint io list - * - * Called with j_list_lock held - */ -static inline void __buffer_relink_io(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - - if (!transaction->t_checkpoint_io_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_io_list; - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_io_list = jh; -} - -/* - * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it and 2 if we also released the - * whole transaction. - * - * Requires j_list_lock - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it - */ -static int __try_to_free_cp_buf(struct journal_head *jh) -{ - int ret = 0; - struct buffer_head *bh = jh2bh(jh); - - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { - JBUFFER_TRACE(jh, "remove from checkpoint list"); - ret = __jbd2_journal_remove_checkpoint(jh) + 1; - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - BUFFER_TRACE(bh, "release"); - __brelse(bh); - } else { - jbd_unlock_bh_state(bh); - } - return ret; -} - -/* - * __jbd2_log_wait_for_space: wait until there is space in the journal. - * - * Called under j-state_lock *only*. It will be unlocked if we have to wait - * for a checkpoint to free up some space in the log. - */ -void __jbd2_log_wait_for_space(journal_t *journal) -{ - int nblocks; - assert_spin_locked(&journal->j_state_lock); - - nblocks = jbd_space_needed(journal); - while (__jbd2_log_space_left(journal) < nblocks) { - if (journal->j_flags & JBD2_ABORT) - return; - spin_unlock(&journal->j_state_lock); - mutex_lock(&journal->j_checkpoint_mutex); - - /* - * Test again, another process may have checkpointed while we - * were waiting for the checkpoint lock - */ - spin_lock(&journal->j_state_lock); - nblocks = jbd_space_needed(journal); - if (__jbd2_log_space_left(journal) < nblocks) { - spin_unlock(&journal->j_state_lock); - jbd2_log_do_checkpoint(journal); - spin_lock(&journal->j_state_lock); - } - mutex_unlock(&journal->j_checkpoint_mutex); - } -} - -/* - * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. - * The caller must restart a list walk. Wait for someone else to run - * jbd_unlock_bh_state(). - */ -static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) - __releases(journal->j_list_lock) -{ - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_lock_bh_state(bh); - jbd_unlock_bh_state(bh); - put_bh(bh); -} - -/* - * Clean up transaction's list of buffers submitted for io. - * We wait for any pending IO to complete and remove any clean - * buffers. Note that we take the buffers in the opposite ordering - * from the one in which they were submitted for IO. - * - * Called with j_list_lock held. - */ -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) -{ - struct journal_head *jh; - struct buffer_head *bh; - tid_t this_tid; - int released = 0; - - this_tid = transaction->t_tid; -restart: - /* Did somebody clean up the transaction in the meanwhile? */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - return; - while (!released && transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; - bh = jh2bh(jh); - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); - goto restart; - } - if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart; - } - /* - * Now in whatever state the buffer currently is, we know that - * it has been written out and so we can drop it from the list - */ - released = __jbd2_journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); - } -} - -#define NR_BATCH 64 - -static void -__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) -{ - int i; - - ll_rw_block(SWRITE, *batch_count, bhs); - for (i = 0; i < *batch_count; i++) { - struct buffer_head *bh = bhs[i]; - clear_buffer_jwrite(bh); - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - } - *batch_count = 0; -} - -/* - * Try to flush one buffer from the checkpoint list to disk. - * - * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. - * - * Called with j_list_lock held and drops it if 1 is returned - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it - */ -static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count) -{ - struct buffer_head *bh = jh2bh(jh); - int ret = 0; - - if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - ret = 1; - } else if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - jbd2_log_start_commit(journal, tid); - jbd2_log_wait_commit(journal, tid); - ret = 1; - } else if (!buffer_dirty(bh)) { - J_ASSERT_JH(jh, !buffer_jbddirty(bh)); - BUFFER_TRACE(bh, "remove from checkpoint"); - __jbd2_journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); - ret = 1; - } else { - /* - * Important: we are about to write the buffer, and - * possibly block, while still holding the journal lock. - * We cannot afford to let the transaction logic start - * messing around with this buffer before we write it to - * disk, as that would break recoverability. - */ - BUFFER_TRACE(bh, "queue"); - get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); - set_buffer_jwrite(bh); - bhs[*batch_count] = bh; - __buffer_relink_io(jh); - jbd_unlock_bh_state(bh); - (*batch_count)++; - if (*batch_count == NR_BATCH) { - spin_unlock(&journal->j_list_lock); - __flush_batch(journal, bhs, batch_count); - ret = 1; - } - } - return ret; -} - -/* - * Perform an actual checkpoint. We take the first transaction on the - * list of transactions to be checkpointed and send all its buffers - * to disk. We submit larger chunks of data at once. - * - * The journal should be locked before calling this function. - */ -int jbd2_log_do_checkpoint(journal_t *journal) -{ - transaction_t *transaction; - tid_t this_tid; - int result; - - jbd_debug(1, "Start checkpoint\n"); - - /* - * First thing: if there are any transactions in the log which - * don't need checkpointing, just eliminate them from the - * journal straight away. - */ - result = jbd2_cleanup_journal_tail(journal); - jbd_debug(1, "cleanup_journal_tail returned %d\n", result); - if (result <= 0) - return result; - - /* - * OK, we need to start writing disk blocks. Take one transaction - * and write it. - */ - spin_lock(&journal->j_list_lock); - if (!journal->j_checkpoint_transactions) - goto out; - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; -restart: - /* - * If someone cleaned up this transaction while we slept, we're - * done (maybe it's a new transaction, but it fell at the same - * address). - */ - if (journal->j_checkpoint_transactions == transaction && - transaction->t_tid == this_tid) { - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; - struct journal_head *jh; - int retry = 0; - - while (!retry && transaction->t_checkpoint_list) { - struct buffer_head *bh; - - jh = transaction->t_checkpoint_list; - bh = jh2bh(jh); - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - retry = 1; - break; - } - retry = __process_buffer(journal, jh, bhs,&batch_count); - if (!retry && lock_need_resched(&journal->j_list_lock)){ - spin_unlock(&journal->j_list_lock); - retry = 1; - break; - } - } - - if (batch_count) { - if (!retry) { - spin_unlock(&journal->j_list_lock); - retry = 1; - } - __flush_batch(journal, bhs, &batch_count); - } - - if (retry) { - spin_lock(&journal->j_list_lock); - goto restart; - } - /* - * Now we have cleaned up the first transaction's checkpoint - * list. Let's clean up the second one - */ - __wait_cp_io(journal, transaction); - } -out: - spin_unlock(&journal->j_list_lock); - result = jbd2_cleanup_journal_tail(journal); - if (result < 0) - return result; - return 0; -} - -/* - * Check the list of checkpoint transactions for the journal to see if - * we have already got rid of any since the last update of the log tail - * in the journal superblock. If so, we can instantly roll the - * superblock forward to remove those transactions from the log. - * - * Return <0 on error, 0 on success, 1 if there was nothing to clean up. - * - * Called with the journal lock held. - * - * This is the only part of the journaling code which really needs to be - * aware of transaction aborts. Checkpointing involves writing to the - * main filesystem area rather than to the journal, so it can proceed - * even in abort state, but we must not update the journal superblock if - * we have an abort error outstanding. - */ - -int jbd2_cleanup_journal_tail(journal_t *journal) -{ - transaction_t * transaction; - tid_t first_tid; - unsigned long blocknr, freed; - - /* OK, work out the oldest transaction remaining in the log, and - * the log block it starts at. - * - * If the log is now empty, we need to work out which is the - * next transaction ID we will write, and where it will - * start. */ - - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - transaction = journal->j_checkpoint_transactions; - if (transaction) { - first_tid = transaction->t_tid; - blocknr = transaction->t_log_start; - } else if ((transaction = journal->j_committing_transaction) != NULL) { - first_tid = transaction->t_tid; - blocknr = transaction->t_log_start; - } else if ((transaction = journal->j_running_transaction) != NULL) { - first_tid = transaction->t_tid; - blocknr = journal->j_head; - } else { - first_tid = journal->j_transaction_sequence; - blocknr = journal->j_head; - } - spin_unlock(&journal->j_list_lock); - J_ASSERT(blocknr != 0); - - /* If the oldest pinned transaction is at the tail of the log - already then there's not much we can do right now. */ - if (journal->j_tail_sequence == first_tid) { - spin_unlock(&journal->j_state_lock); - return 1; - } - - /* OK, update the superblock to recover the freed space. - * Physical blocks come first: have we wrapped beyond the end of - * the log? */ - freed = blocknr - journal->j_tail; - if (blocknr < journal->j_tail) - freed = freed + journal->j_last - journal->j_first; - - jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %lu), " - "freeing %lu\n", - journal->j_tail_sequence, first_tid, blocknr, freed); - - journal->j_free += freed; - journal->j_tail_sequence = first_tid; - journal->j_tail = blocknr; - spin_unlock(&journal->j_state_lock); - if (!(journal->j_flags & JBD2_ABORT)) - jbd2_journal_update_superblock(journal, 1); - return 0; -} - - -/* Checkpoint list management */ - -/* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and release them. - * - * Called with the journal locked. - * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) - */ - -static int journal_clean_one_cp_list(struct journal_head *jh, int *released) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - int ret, freed = 0; - - *released = 0; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranking */ - if (jbd_trylock_bh_state(jh2bh(jh))) { - ret = __try_to_free_cp_buf(jh); - if (ret) { - freed++; - if (ret == 2) { - *released = 1; - return freed; - } - } - } - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - return freed; - } while (jh != last_jh); - - return freed; -} - -/* - * journal_clean_checkpoint_list - * - * Find all the written-back checkpoint buffers in the journal and release them. - * - * Called with the journal locked. - * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) - */ - -int __jbd2_journal_clean_checkpoint_list(journal_t *journal) -{ - transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; - int released; - - transaction = journal->j_checkpoint_transactions; - if (!transaction) - goto out; - - last_transaction = transaction->t_cpprev; - next_transaction = transaction; - do { - transaction = next_transaction; - next_transaction = transaction->t_cpnext; - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_list, &released); - /* - * This function only frees up some memory if possible so we - * dont have an obligation to finish processing. Bail out if - * preemption requested: - */ - if (need_resched()) - goto out; - if (released) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, &released); - if (need_resched()) - goto out; - } while (transaction != last_transaction); -out: - return ret; -} - -/* - * journal_remove_checkpoint: called after a buffer has been committed - * to disk (either by being write-back flushed to disk, or being - * committed to the log). - * - * We cannot safely clean a transaction out of the log until all of the - * buffer updates committed in that transaction have safely been stored - * elsewhere on disk. To achieve this, all of the buffers in a - * transaction need to be maintained on the transaction's checkpoint - * lists until they have been rewritten, at which point this function is - * called to remove the buffer from the existing transaction's - * checkpoint lists. - * - * The function returns 1 if it frees the transaction, 0 otherwise. - * - * This function is called with the journal locked. - * This function is called with j_list_lock held. - * This function is called with jbd_lock_bh_state(jh2bh(jh)) - */ - -int __jbd2_journal_remove_checkpoint(struct journal_head *jh) -{ - transaction_t *transaction; - journal_t *journal; - int ret = 0; - - JBUFFER_TRACE(jh, "entry"); - - if ((transaction = jh->b_cp_transaction) == NULL) { - JBUFFER_TRACE(jh, "not on transaction"); - goto out; - } - journal = transaction->t_journal; - - __buffer_unlink(jh); - jh->b_cp_transaction = NULL; - - if (transaction->t_checkpoint_list != NULL || - transaction->t_checkpoint_io_list != NULL) - goto out; - JBUFFER_TRACE(jh, "transaction has no more buffers"); - - /* - * There is one special case to worry about: if we have just pulled the - * buffer off a committing transaction's forget list, then even if the - * checkpoint list is empty, the transaction obviously cannot be - * dropped! - * - * The locking here around j_committing_transaction is a bit sleazy. - * See the comment at the end of jbd2_journal_commit_transaction(). - */ - if (transaction == journal->j_committing_transaction) { - JBUFFER_TRACE(jh, "belongs to committing transaction"); - goto out; - } - - /* OK, that was the last buffer for the transaction: we can now - safely remove this transaction from the log */ - - __jbd2_journal_drop_transaction(journal, transaction); - - /* Just in case anybody was waiting for more transactions to be - checkpointed... */ - wake_up(&journal->j_wait_logspace); - ret = 1; -out: - JBUFFER_TRACE(jh, "exit"); - return ret; -} - -/* - * journal_insert_checkpoint: put a committed buffer onto a checkpoint - * list so that we know when it is safe to clean the transaction out of - * the log. - * - * Called with the journal locked. - * Called with j_list_lock held. - */ -void __jbd2_journal_insert_checkpoint(struct journal_head *jh, - transaction_t *transaction) -{ - JBUFFER_TRACE(jh, "entry"); - J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); - J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); - - jh->b_cp_transaction = transaction; - - if (!transaction->t_checkpoint_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_list; - jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_list = jh; -} - -/* - * We've finished with this transaction structure: adios... - * - * The transaction must have no links except for the checkpoint by this - * point. - * - * Called with the journal locked. - * Called with j_list_lock held. - */ - -void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction) -{ - assert_spin_locked(&journal->j_list_lock); - if (transaction->t_cpnext) { - transaction->t_cpnext->t_cpprev = transaction->t_cpprev; - transaction->t_cpprev->t_cpnext = transaction->t_cpnext; - if (journal->j_checkpoint_transactions == transaction) - journal->j_checkpoint_transactions = - transaction->t_cpnext; - if (journal->j_checkpoint_transactions == transaction) - journal->j_checkpoint_transactions = NULL; - } - - J_ASSERT(transaction->t_state == T_FINISHED); - J_ASSERT(transaction->t_buffers == NULL); - J_ASSERT(transaction->t_sync_datalist == NULL); - J_ASSERT(transaction->t_forget == NULL); - J_ASSERT(transaction->t_iobuf_list == NULL); - J_ASSERT(transaction->t_shadow_list == NULL); - J_ASSERT(transaction->t_log_list == NULL); - J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); - J_ASSERT(transaction->t_updates == 0); - J_ASSERT(journal->j_committing_transaction != transaction); - J_ASSERT(journal->j_running_transaction != transaction); - - jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); - kfree(transaction); -} diff --git a/trunk/fs/jbd2/commit.c b/trunk/fs/jbd2/commit.c deleted file mode 100644 index 70b2ae1ef281..000000000000 --- a/trunk/fs/jbd2/commit.c +++ /dev/null @@ -1,920 +0,0 @@ -/* - * linux/fs/jbd2/commit.c - * - * Written by Stephen C. Tweedie , 1998 - * - * Copyright 1998 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Journal commit routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Default IO end handler for temporary BJ_IO buffer_heads. - */ -static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) -{ - BUFFER_TRACE(bh, ""); - if (uptodate) - set_buffer_uptodate(bh); - else - clear_buffer_uptodate(bh); - unlock_buffer(bh); -} - -/* - * When an ext3-ordered file is truncated, it is possible that many pages are - * not sucessfully freed, because they are attached to a committing transaction. - * After the transaction commits, these pages are left on the LRU, with no - * ->mapping, and with attached buffers. These pages are trivially reclaimable - * by the VM, but their apparent absence upsets the VM accounting, and it makes - * the numbers in /proc/meminfo look odd. - * - * So here, we have a buffer which has just come off the forget list. Look to - * see if we can strip all buffers from the backing page. - * - * Called under lock_journal(), and possibly under journal_datalist_lock. The - * caller provided us with a ref against the buffer, and we drop that here. - */ -static void release_buffer_page(struct buffer_head *bh) -{ - struct page *page; - - if (buffer_dirty(bh)) - goto nope; - if (atomic_read(&bh->b_count) != 1) - goto nope; - page = bh->b_page; - if (!page) - goto nope; - if (page->mapping) - goto nope; - - /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) - goto nope; - - page_cache_get(page); - __brelse(bh); - try_to_free_buffers(page); - unlock_page(page); - page_cache_release(page); - return; - -nope: - __brelse(bh); -} - -/* - * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is - * held. For ranking reasons we must trylock. If we lose, schedule away and - * return 0. j_list_lock is dropped in this case. - */ -static int inverted_lock(journal_t *journal, struct buffer_head *bh) -{ - if (!jbd_trylock_bh_state(bh)) { - spin_unlock(&journal->j_list_lock); - schedule(); - return 0; - } - return 1; -} - -/* Done it all: now write the commit record. We should have - * cleaned up our previous buffers by now, so if we are in abort - * mode we can now just skip the rest of the journal write - * entirely. - * - * Returns 1 if the journal needs to be aborted or 0 on success - */ -static int journal_write_commit_record(journal_t *journal, - transaction_t *commit_transaction) -{ - struct journal_head *descriptor; - struct buffer_head *bh; - int i, ret; - int barrier_done = 0; - - if (is_journal_aborted(journal)) - return 0; - - descriptor = jbd2_journal_get_descriptor_buffer(journal); - if (!descriptor) - return 1; - - bh = jh2bh(descriptor); - - /* AKPM: buglet - add `i' to tmp! */ - for (i = 0; i < bh->b_size; i += 512) { - journal_header_t *tmp = (journal_header_t*)bh->b_data; - tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); - tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); - } - - JBUFFER_TRACE(descriptor, "write commit block"); - set_buffer_dirty(bh); - if (journal->j_flags & JBD2_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; - - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - spin_unlock(&journal->j_state_lock); - - /* And try again, without the barrier */ - clear_buffer_ordered(bh); - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - ret = sync_dirty_buffer(bh); - } - put_bh(bh); /* One for getblk() */ - jbd2_journal_put_journal_head(descriptor); - - return (ret == -EIO); -} - -static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) -{ - int i; - - for (i = 0; i < bufs; i++) { - wbuf[i]->b_end_io = end_buffer_write_sync; - /* We use-up our safety reference in submit_bh() */ - submit_bh(WRITE, wbuf[i]); - } -} - -/* - * Submit all the data buffers to disk - */ -static void journal_submit_data_buffers(journal_t *journal, - transaction_t *commit_transaction) -{ - struct journal_head *jh; - struct buffer_head *bh; - int locked; - int bufs = 0; - struct buffer_head **wbuf = journal->j_wbuf; - - /* - * Whenever we unlock the journal and sleep, things can get added - * onto ->t_sync_datalist, so we have to keep looping back to - * write_out_data until we *know* that the list is empty. - * - * Cleanup any flushed data buffers from the data list. Even in - * abort mode, we want to flush this out as soon as possible. - */ -write_out_data: - cond_resched(); - spin_lock(&journal->j_list_lock); - - while (commit_transaction->t_sync_datalist) { - jh = commit_transaction->t_sync_datalist; - bh = jh2bh(jh); - locked = 0; - - /* Get reference just to make sure buffer does not disappear - * when we are forced to drop various locks */ - get_bh(bh); - /* If the buffer is dirty, we need to submit IO and hence - * we need the buffer lock. We try to lock the buffer without - * blocking. If we fail, we need to drop j_list_lock and do - * blocking lock_buffer(). - */ - if (buffer_dirty(bh)) { - if (test_set_buffer_locked(bh)) { - BUFFER_TRACE(bh, "needs blocking lock"); - spin_unlock(&journal->j_list_lock); - /* Write out all data to prevent deadlocks */ - journal_do_submit_data(wbuf, bufs); - bufs = 0; - lock_buffer(bh); - spin_lock(&journal->j_list_lock); - } - locked = 1; - } - /* We have to get bh_state lock. Again out of order, sigh. */ - if (!inverted_lock(journal, bh)) { - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - } - /* Someone already cleaned up the buffer? */ - if (!buffer_jbd(bh) - || jh->b_transaction != commit_transaction - || jh->b_jlist != BJ_SyncData) { - jbd_unlock_bh_state(bh); - if (locked) - unlock_buffer(bh); - BUFFER_TRACE(bh, "already cleaned up"); - put_bh(bh); - continue; - } - if (locked && test_clear_buffer_dirty(bh)) { - BUFFER_TRACE(bh, "needs writeout, adding to array"); - wbuf[bufs++] = bh; - __jbd2_journal_file_buffer(jh, commit_transaction, - BJ_Locked); - jbd_unlock_bh_state(bh); - if (bufs == journal->j_wbufsize) { - spin_unlock(&journal->j_list_lock); - journal_do_submit_data(wbuf, bufs); - bufs = 0; - goto write_out_data; - } - } - else { - BUFFER_TRACE(bh, "writeout complete: unfile"); - __jbd2_journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - if (locked) - unlock_buffer(bh); - jbd2_journal_remove_journal_head(bh); - /* Once for our safety reference, once for - * jbd2_journal_remove_journal_head() */ - put_bh(bh); - put_bh(bh); - } - - if (lock_need_resched(&journal->j_list_lock)) { - spin_unlock(&journal->j_list_lock); - goto write_out_data; - } - } - spin_unlock(&journal->j_list_lock); - journal_do_submit_data(wbuf, bufs); -} - -static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, - unsigned long long block) -{ - tag->t_blocknr = cpu_to_be32(block & (u32)~0); - if (tag_bytes > JBD_TAG_SIZE32) - tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); -} - -/* - * jbd2_journal_commit_transaction - * - * The primary function for committing a transaction to the log. This - * function is called by the journal thread to begin a complete commit. - */ -void jbd2_journal_commit_transaction(journal_t *journal) -{ - transaction_t *commit_transaction; - struct journal_head *jh, *new_jh, *descriptor; - struct buffer_head **wbuf = journal->j_wbuf; - int bufs; - int flags; - int err; - unsigned long long blocknr; - char *tagp = NULL; - journal_header_t *header; - journal_block_tag_t *tag = NULL; - int space_left = 0; - int first_tag = 0; - int tag_flag; - int i; - int tag_bytes = journal_tag_bytes(journal); - - /* - * First job: lock down the current transaction and wait for - * all outstanding updates to complete. - */ - -#ifdef COMMIT_STATS - spin_lock(&journal->j_list_lock); - summarise_journal_usage(journal); - spin_unlock(&journal->j_list_lock); -#endif - - /* Do we need to erase the effects of a prior jbd2_journal_flush? */ - if (journal->j_flags & JBD2_FLUSHED) { - jbd_debug(3, "super block updated\n"); - jbd2_journal_update_superblock(journal, 1); - } else { - jbd_debug(3, "superblock not updated\n"); - } - - J_ASSERT(journal->j_running_transaction != NULL); - J_ASSERT(journal->j_committing_transaction == NULL); - - commit_transaction = journal->j_running_transaction; - J_ASSERT(commit_transaction->t_state == T_RUNNING); - - jbd_debug(1, "JBD: starting commit of transaction %d\n", - commit_transaction->t_tid); - - spin_lock(&journal->j_state_lock); - commit_transaction->t_state = T_LOCKED; - - spin_lock(&commit_transaction->t_handle_lock); - while (commit_transaction->t_updates) { - DEFINE_WAIT(wait); - - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (commit_transaction->t_updates) { - spin_unlock(&commit_transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - schedule(); - spin_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); - } - finish_wait(&journal->j_wait_updates, &wait); - } - spin_unlock(&commit_transaction->t_handle_lock); - - J_ASSERT (commit_transaction->t_outstanding_credits <= - journal->j_max_transaction_buffers); - - /* - * First thing we are allowed to do is to discard any remaining - * BJ_Reserved buffers. Note, it is _not_ permissible to assume - * that there are no such buffers: if a large filesystem - * operation like a truncate needs to split itself over multiple - * transactions, then it may try to do a jbd2_journal_restart() while - * there are still BJ_Reserved buffers outstanding. These must - * be released cleanly from the current transaction. - * - * In this case, the filesystem must still reserve write access - * again before modifying the buffer in the new transaction, but - * we do not require it to remember exactly which old buffers it - * has reserved. This is consistent with the existing behaviour - * that multiple jbd2_journal_get_write_access() calls to the same - * buffer are perfectly permissable. - */ - while (commit_transaction->t_reserved_list) { - jh = commit_transaction->t_reserved_list; - JBUFFER_TRACE(jh, "reserved, unused: refile"); - /* - * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may - * leave undo-committed data. - */ - if (jh->b_committed_data) { - struct buffer_head *bh = jh2bh(jh); - - jbd_lock_bh_state(bh); - jbd2_slab_free(jh->b_committed_data, bh->b_size); - jh->b_committed_data = NULL; - jbd_unlock_bh_state(bh); - } - jbd2_journal_refile_buffer(journal, jh); - } - - /* - * Now try to drop any written-back buffers from the journal's - * checkpoint lists. We do this *before* commit because it potentially - * frees some memory - */ - spin_lock(&journal->j_list_lock); - __jbd2_journal_clean_checkpoint_list(journal); - spin_unlock(&journal->j_list_lock); - - jbd_debug (3, "JBD: commit phase 1\n"); - - /* - * Switch to a new revoke table. - */ - jbd2_journal_switch_revoke_table(journal); - - commit_transaction->t_state = T_FLUSH; - journal->j_committing_transaction = commit_transaction; - journal->j_running_transaction = NULL; - commit_transaction->t_log_start = journal->j_head; - wake_up(&journal->j_wait_transaction_locked); - spin_unlock(&journal->j_state_lock); - - jbd_debug (3, "JBD: commit phase 2\n"); - - /* - * First, drop modified flag: all accesses to the buffers - * will be tracked for a new trasaction only -bzzz - */ - spin_lock(&journal->j_list_lock); - if (commit_transaction->t_buffers) { - new_jh = jh = commit_transaction->t_buffers->b_tnext; - do { - J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || - new_jh->b_modified == 0); - new_jh->b_modified = 0; - new_jh = new_jh->b_tnext; - } while (new_jh != jh); - } - spin_unlock(&journal->j_list_lock); - - /* - * Now start flushing things to disk, in the order they appear - * on the transaction lists. Data blocks go first. - */ - err = 0; - journal_submit_data_buffers(journal, commit_transaction); - - /* - * Wait for all previously submitted IO to complete. - */ - spin_lock(&journal->j_list_lock); - while (commit_transaction->t_locked_list) { - struct buffer_head *bh; - - jh = commit_transaction->t_locked_list->b_tprev; - bh = jh2bh(jh); - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - spin_lock(&journal->j_list_lock); - } - if (!inverted_lock(journal, bh)) { - put_bh(bh); - spin_lock(&journal->j_list_lock); - continue; - } - if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { - __jbd2_journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - put_bh(bh); - } else { - jbd_unlock_bh_state(bh); - } - put_bh(bh); - cond_resched_lock(&journal->j_list_lock); - } - spin_unlock(&journal->j_list_lock); - - if (err) - __jbd2_journal_abort_hard(journal); - - jbd2_journal_write_revoke_records(journal, commit_transaction); - - jbd_debug(3, "JBD: commit phase 2\n"); - - /* - * If we found any dirty or locked buffers, then we should have - * looped back up to the write_out_data label. If there weren't - * any then journal_clean_data_list should have wiped the list - * clean by now, so check that it is in fact empty. - */ - J_ASSERT (commit_transaction->t_sync_datalist == NULL); - - jbd_debug (3, "JBD: commit phase 3\n"); - - /* - * Way to go: we have now written out all of the data for a - * transaction! Now comes the tricky part: we need to write out - * metadata. Loop over the transaction's entire buffer list: - */ - commit_transaction->t_state = T_COMMIT; - - descriptor = NULL; - bufs = 0; - while (commit_transaction->t_buffers) { - - /* Find the next buffer to be journaled... */ - - jh = commit_transaction->t_buffers; - - /* If we're in abort mode, we just un-journal the buffer and - release it for background writing. */ - - if (is_journal_aborted(journal)) { - JBUFFER_TRACE(jh, "journal is aborting: refile"); - jbd2_journal_refile_buffer(journal, jh); - /* If that was the last one, we need to clean up - * any descriptor buffers which may have been - * already allocated, even if we are now - * aborting. */ - if (!commit_transaction->t_buffers) - goto start_journal_io; - continue; - } - - /* Make sure we have a descriptor block in which to - record the metadata buffer. */ - - if (!descriptor) { - struct buffer_head *bh; - - J_ASSERT (bufs == 0); - - jbd_debug(4, "JBD: get descriptor\n"); - - descriptor = jbd2_journal_get_descriptor_buffer(journal); - if (!descriptor) { - __jbd2_journal_abort_hard(journal); - continue; - } - - bh = jh2bh(descriptor); - jbd_debug(4, "JBD: got buffer %llu (%p)\n", - (unsigned long long)bh->b_blocknr, bh->b_data); - header = (journal_header_t *)&bh->b_data[0]; - header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); - header->h_sequence = cpu_to_be32(commit_transaction->t_tid); - - tagp = &bh->b_data[sizeof(journal_header_t)]; - space_left = bh->b_size - sizeof(journal_header_t); - first_tag = 1; - set_buffer_jwrite(bh); - set_buffer_dirty(bh); - wbuf[bufs++] = bh; - - /* Record it so that we can wait for IO - completion later */ - BUFFER_TRACE(bh, "ph3: file as descriptor"); - jbd2_journal_file_buffer(descriptor, commit_transaction, - BJ_LogCtl); - } - - /* Where is the buffer to be written? */ - - err = jbd2_journal_next_log_block(journal, &blocknr); - /* If the block mapping failed, just abandon the buffer - and repeat this loop: we'll fall into the - refile-on-abort condition above. */ - if (err) { - __jbd2_journal_abort_hard(journal); - continue; - } - - /* - * start_this_handle() uses t_outstanding_credits to determine - * the free space in the log, but this counter is changed - * by jbd2_journal_next_log_block() also. - */ - commit_transaction->t_outstanding_credits--; - - /* Bump b_count to prevent truncate from stumbling over - the shadowed buffer! @@@ This can go if we ever get - rid of the BJ_IO/BJ_Shadow pairing of buffers. */ - atomic_inc(&jh2bh(jh)->b_count); - - /* Make a temporary IO buffer with which to write it out - (this will requeue both the metadata buffer and the - temporary IO buffer). new_bh goes on BJ_IO*/ - - set_bit(BH_JWrite, &jh2bh(jh)->b_state); - /* - * akpm: jbd2_journal_write_metadata_buffer() sets - * new_bh->b_transaction to commit_transaction. - * We need to clean this up before we release new_bh - * (which is of type BJ_IO) - */ - JBUFFER_TRACE(jh, "ph3: write metadata"); - flags = jbd2_journal_write_metadata_buffer(commit_transaction, - jh, &new_jh, blocknr); - set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); - wbuf[bufs++] = jh2bh(new_jh); - - /* Record the new block's tag in the current descriptor - buffer */ - - tag_flag = 0; - if (flags & 1) - tag_flag |= JBD2_FLAG_ESCAPE; - if (!first_tag) - tag_flag |= JBD2_FLAG_SAME_UUID; - - tag = (journal_block_tag_t *) tagp; - write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); - tag->t_flags = cpu_to_be32(tag_flag); - tagp += tag_bytes; - space_left -= tag_bytes; - - if (first_tag) { - memcpy (tagp, journal->j_uuid, 16); - tagp += 16; - space_left -= 16; - first_tag = 0; - } - - /* If there's no more to do, or if the descriptor is full, - let the IO rip! */ - - if (bufs == journal->j_wbufsize || - commit_transaction->t_buffers == NULL || - space_left < tag_bytes + 16) { - - jbd_debug(4, "JBD: Submit %d IOs\n", bufs); - - /* Write an end-of-descriptor marker before - submitting the IOs. "tag" still points to - the last tag we set up. */ - - tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); - -start_journal_io: - for (i = 0; i < bufs; i++) { - struct buffer_head *bh = wbuf[i]; - lock_buffer(bh); - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE, bh); - } - cond_resched(); - - /* Force a new descriptor to be generated next - time round the loop. */ - descriptor = NULL; - bufs = 0; - } - } - - /* Lo and behold: we have just managed to send a transaction to - the log. Before we can commit it, wait for the IO so far to - complete. Control buffers being written are on the - transaction's t_log_list queue, and metadata buffers are on - the t_iobuf_list queue. - - Wait for the buffers in reverse order. That way we are - less likely to be woken up until all IOs have completed, and - so we incur less scheduling load. - */ - - jbd_debug(3, "JBD: commit phase 4\n"); - - /* - * akpm: these are BJ_IO, and j_list_lock is not needed. - * See __journal_try_to_free_buffer. - */ -wait_for_iobuf: - while (commit_transaction->t_iobuf_list != NULL) { - struct buffer_head *bh; - - jh = commit_transaction->t_iobuf_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_iobuf; - } - if (cond_resched()) - goto wait_for_iobuf; - - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - - clear_buffer_jwrite(bh); - - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); - jbd2_journal_unfile_buffer(journal, jh); - - /* - * ->t_iobuf_list should contain only dummy buffer_heads - * which were created by jbd2_journal_write_metadata_buffer(). - */ - BUFFER_TRACE(bh, "dumping temporary bh"); - jbd2_journal_put_journal_head(jh); - __brelse(bh); - J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); - free_buffer_head(bh); - - /* We also have to unlock and free the corresponding - shadowed buffer */ - jh = commit_transaction->t_shadow_list->b_tprev; - bh = jh2bh(jh); - clear_bit(BH_JWrite, &bh->b_state); - J_ASSERT_BH(bh, buffer_jbddirty(bh)); - - /* The metadata is now released for reuse, but we need - to remember it against this transaction so that when - we finally commit, we can do any checkpointing - required. */ - JBUFFER_TRACE(jh, "file as BJ_Forget"); - jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* Wake up any transactions which were waiting for this - IO to complete */ - wake_up_bit(&bh->b_state, BH_Unshadow); - JBUFFER_TRACE(jh, "brelse shadowed buffer"); - __brelse(bh); - } - - J_ASSERT (commit_transaction->t_shadow_list == NULL); - - jbd_debug(3, "JBD: commit phase 5\n"); - - /* Here we wait for the revoke record and descriptor record buffers */ - wait_for_ctlbuf: - while (commit_transaction->t_log_list != NULL) { - struct buffer_head *bh; - - jh = commit_transaction->t_log_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_ctlbuf; - } - if (cond_resched()) - goto wait_for_ctlbuf; - - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - - BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); - clear_buffer_jwrite(bh); - jbd2_journal_unfile_buffer(journal, jh); - jbd2_journal_put_journal_head(jh); - __brelse(bh); /* One for getblk */ - /* AKPM: bforget here */ - } - - jbd_debug(3, "JBD: commit phase 6\n"); - - if (journal_write_commit_record(journal, commit_transaction)) - err = -EIO; - - if (err) - __jbd2_journal_abort_hard(journal); - - /* End of a transaction! Finally, we can do checkpoint - processing: any buffers committed as a result of this - transaction can be removed from any checkpoint list it was on - before. */ - - jbd_debug(3, "JBD: commit phase 7\n"); - - J_ASSERT(commit_transaction->t_sync_datalist == NULL); - J_ASSERT(commit_transaction->t_buffers == NULL); - J_ASSERT(commit_transaction->t_checkpoint_list == NULL); - J_ASSERT(commit_transaction->t_iobuf_list == NULL); - J_ASSERT(commit_transaction->t_shadow_list == NULL); - J_ASSERT(commit_transaction->t_log_list == NULL); - -restart_loop: - /* - * As there are other places (journal_unmap_buffer()) adding buffers - * to this list we have to be careful and hold the j_list_lock. - */ - spin_lock(&journal->j_list_lock); - while (commit_transaction->t_forget) { - transaction_t *cp_transaction; - struct buffer_head *bh; - - jh = commit_transaction->t_forget; - spin_unlock(&journal->j_list_lock); - bh = jh2bh(jh); - jbd_lock_bh_state(bh); - J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || - jh->b_transaction == journal->j_running_transaction); - - /* - * If there is undo-protected committed data against - * this buffer, then we can remove it now. If it is a - * buffer needing such protection, the old frozen_data - * field now points to a committed version of the - * buffer, so rotate that field to the new committed - * data. - * - * Otherwise, we can just throw away the frozen data now. - */ - if (jh->b_committed_data) { - jbd2_slab_free(jh->b_committed_data, bh->b_size); - jh->b_committed_data = NULL; - if (jh->b_frozen_data) { - jh->b_committed_data = jh->b_frozen_data; - jh->b_frozen_data = NULL; - } - } else if (jh->b_frozen_data) { - jbd2_slab_free(jh->b_frozen_data, bh->b_size); - jh->b_frozen_data = NULL; - } - - spin_lock(&journal->j_list_lock); - cp_transaction = jh->b_cp_transaction; - if (cp_transaction) { - JBUFFER_TRACE(jh, "remove from old cp transaction"); - __jbd2_journal_remove_checkpoint(jh); - } - - /* Only re-checkpoint the buffer_head if it is marked - * dirty. If the buffer was added to the BJ_Forget list - * by jbd2_journal_forget, it may no longer be dirty and - * there's no point in keeping a checkpoint record for - * it. */ - - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future now that the last use has - * been committed. That's not only a performance gain, - * it also stops aliasing problems if the buffer is left - * behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh)) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); - } - - if (buffer_jbddirty(bh)) { - JBUFFER_TRACE(jh, "add to new checkpointing trans"); - __jbd2_journal_insert_checkpoint(jh, commit_transaction); - JBUFFER_TRACE(jh, "refile for checkpoint writeback"); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); - } else { - J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* The buffer on BJ_Forget list and not jbddirty means - * it has been freed by this transaction and hence it - * could not have been reallocated until this - * transaction has committed. *BUT* it could be - * reallocated once we have written all the data to - * disk and before we process the buffer on BJ_Forget - * list. */ - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __jbd2_journal_refile_buffer(jh); - if (!jh->b_transaction) { - jbd_unlock_bh_state(bh); - /* needs a brelse */ - jbd2_journal_remove_journal_head(bh); - release_buffer_page(bh); - } else - jbd_unlock_bh_state(bh); - } - cond_resched_lock(&journal->j_list_lock); - } - spin_unlock(&journal->j_list_lock); - /* - * This is a bit sleazy. We borrow j_list_lock to protect - * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. - * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but - * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint - */ - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - /* - * Now recheck if some buffers did not get attached to the transaction - * while the lock was dropped... - */ - if (commit_transaction->t_forget) { - spin_unlock(&journal->j_list_lock); - spin_unlock(&journal->j_state_lock); - goto restart_loop; - } - - /* Done with this transaction! */ - - jbd_debug(3, "JBD: commit phase 8\n"); - - J_ASSERT(commit_transaction->t_state == T_COMMIT); - - commit_transaction->t_state = T_FINISHED; - J_ASSERT(commit_transaction == journal->j_committing_transaction); - journal->j_commit_sequence = commit_transaction->t_tid; - journal->j_committing_transaction = NULL; - spin_unlock(&journal->j_state_lock); - - if (commit_transaction->t_checkpoint_list == NULL) { - __jbd2_journal_drop_transaction(journal, commit_transaction); - } else { - if (journal->j_checkpoint_transactions == NULL) { - journal->j_checkpoint_transactions = commit_transaction; - commit_transaction->t_cpnext = commit_transaction; - commit_transaction->t_cpprev = commit_transaction; - } else { - commit_transaction->t_cpnext = - journal->j_checkpoint_transactions; - commit_transaction->t_cpprev = - commit_transaction->t_cpnext->t_cpprev; - commit_transaction->t_cpnext->t_cpprev = - commit_transaction; - commit_transaction->t_cpprev->t_cpnext = - commit_transaction; - } - } - spin_unlock(&journal->j_list_lock); - - jbd_debug(1, "JBD: commit %d complete, head %d\n", - journal->j_commit_sequence, journal->j_tail_sequence); - - wake_up(&journal->j_wait_done_commit); -} diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c deleted file mode 100644 index 10db92ced014..000000000000 --- a/trunk/fs/jbd2/journal.c +++ /dev/null @@ -1,2083 +0,0 @@ -/* - * linux/fs/jbd2/journal.c - * - * Written by Stephen C. Tweedie , 1998 - * - * Copyright 1998 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Generic filesystem journal-writing code; part of the ext2fs - * journaling system. - * - * This file manages journals: areas of disk reserved for logging - * transactional updates. This includes the kernel journaling thread - * which is responsible for scheduling updates to the log. - * - * We do not actually manage the physical storage of the journal in this - * file: that is left to a per-journal policy function, which allows us - * to store the journal within a filesystem-specified area for ext2 - * journaling (ext2 can use a reserved inode for storing the log). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -EXPORT_SYMBOL(jbd2_journal_start); -EXPORT_SYMBOL(jbd2_journal_restart); -EXPORT_SYMBOL(jbd2_journal_extend); -EXPORT_SYMBOL(jbd2_journal_stop); -EXPORT_SYMBOL(jbd2_journal_lock_updates); -EXPORT_SYMBOL(jbd2_journal_unlock_updates); -EXPORT_SYMBOL(jbd2_journal_get_write_access); -EXPORT_SYMBOL(jbd2_journal_get_create_access); -EXPORT_SYMBOL(jbd2_journal_get_undo_access); -EXPORT_SYMBOL(jbd2_journal_dirty_data); -EXPORT_SYMBOL(jbd2_journal_dirty_metadata); -EXPORT_SYMBOL(jbd2_journal_release_buffer); -EXPORT_SYMBOL(jbd2_journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif -EXPORT_SYMBOL(jbd2_journal_flush); -EXPORT_SYMBOL(jbd2_journal_revoke); - -EXPORT_SYMBOL(jbd2_journal_init_dev); -EXPORT_SYMBOL(jbd2_journal_init_inode); -EXPORT_SYMBOL(jbd2_journal_update_format); -EXPORT_SYMBOL(jbd2_journal_check_used_features); -EXPORT_SYMBOL(jbd2_journal_check_available_features); -EXPORT_SYMBOL(jbd2_journal_set_features); -EXPORT_SYMBOL(jbd2_journal_create); -EXPORT_SYMBOL(jbd2_journal_load); -EXPORT_SYMBOL(jbd2_journal_destroy); -EXPORT_SYMBOL(jbd2_journal_update_superblock); -EXPORT_SYMBOL(jbd2_journal_abort); -EXPORT_SYMBOL(jbd2_journal_errno); -EXPORT_SYMBOL(jbd2_journal_ack_err); -EXPORT_SYMBOL(jbd2_journal_clear_err); -EXPORT_SYMBOL(jbd2_log_wait_commit); -EXPORT_SYMBOL(jbd2_journal_start_commit); -EXPORT_SYMBOL(jbd2_journal_force_commit_nested); -EXPORT_SYMBOL(jbd2_journal_wipe); -EXPORT_SYMBOL(jbd2_journal_blocks_per_page); -EXPORT_SYMBOL(jbd2_journal_invalidatepage); -EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); -EXPORT_SYMBOL(jbd2_journal_force_commit); - -static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); -static void __journal_abort_soft (journal_t *journal, int errno); -static int jbd2_journal_create_jbd_slab(size_t slab_size); - -/* - * Helper function used to manage commit timeouts - */ - -static void commit_timeout(unsigned long __data) -{ - struct task_struct * p = (struct task_struct *) __data; - - wake_up_process(p); -} - -/* - * kjournald2: The main thread function used to manage a logging device - * journal. - * - * This kernel thread is responsible for two things: - * - * 1) COMMIT: Every so often we need to commit the current state of the - * filesystem to disk. The journal thread is responsible for writing - * all of the metadata buffers to disk. - * - * 2) CHECKPOINT: We cannot reuse a used section of the log file until all - * of the data in that part of the log has been rewritten elsewhere on - * the disk. Flushing these old buffers to reclaim space in the log is - * known as checkpointing, and this thread is responsible for that job. - */ - -static int kjournald2(void *arg) -{ - journal_t *journal = arg; - transaction_t *transaction; - - /* - * Set up an interval timer which can be used to trigger a commit wakeup - * after the commit interval expires - */ - setup_timer(&journal->j_commit_timer, commit_timeout, - (unsigned long)current); - - /* Record that the journal thread is running */ - journal->j_task = current; - wake_up(&journal->j_wait_done_commit); - - printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", - journal->j_commit_interval / HZ); - - /* - * And now, wait forever for commit wakeup events. - */ - spin_lock(&journal->j_state_lock); - -loop: - if (journal->j_flags & JBD2_UNMOUNT) - goto end_loop; - - jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", - journal->j_commit_sequence, journal->j_commit_request); - - if (journal->j_commit_sequence != journal->j_commit_request) { - jbd_debug(1, "OK, requests differ\n"); - spin_unlock(&journal->j_state_lock); - del_timer_sync(&journal->j_commit_timer); - jbd2_journal_commit_transaction(journal); - spin_lock(&journal->j_state_lock); - goto loop; - } - - wake_up(&journal->j_wait_done_commit); - if (freezing(current)) { - /* - * The simpler the better. Flushing journal isn't a - * good idea, because that depends on threads that may - * be already stopped. - */ - jbd_debug(1, "Now suspending kjournald2\n"); - spin_unlock(&journal->j_state_lock); - refrigerator(); - spin_lock(&journal->j_state_lock); - } else { - /* - * We assume on resume that commits are already there, - * so we don't sleep - */ - DEFINE_WAIT(wait); - int should_sleep = 1; - - prepare_to_wait(&journal->j_wait_commit, &wait, - TASK_INTERRUPTIBLE); - if (journal->j_commit_sequence != journal->j_commit_request) - should_sleep = 0; - transaction = journal->j_running_transaction; - if (transaction && time_after_eq(jiffies, - transaction->t_expires)) - should_sleep = 0; - if (journal->j_flags & JBD2_UNMOUNT) - should_sleep = 0; - if (should_sleep) { - spin_unlock(&journal->j_state_lock); - schedule(); - spin_lock(&journal->j_state_lock); - } - finish_wait(&journal->j_wait_commit, &wait); - } - - jbd_debug(1, "kjournald2 wakes\n"); - - /* - * Were we woken up by a commit wakeup event? - */ - transaction = journal->j_running_transaction; - if (transaction && time_after_eq(jiffies, transaction->t_expires)) { - journal->j_commit_request = transaction->t_tid; - jbd_debug(1, "woke because of timeout\n"); - } - goto loop; - -end_loop: - spin_unlock(&journal->j_state_lock); - del_timer_sync(&journal->j_commit_timer); - journal->j_task = NULL; - wake_up(&journal->j_wait_done_commit); - jbd_debug(1, "Journal thread exiting.\n"); - return 0; -} - -static void jbd2_journal_start_thread(journal_t *journal) -{ - kthread_run(kjournald2, journal, "kjournald2"); - wait_event(journal->j_wait_done_commit, journal->j_task != 0); -} - -static void journal_kill_thread(journal_t *journal) -{ - spin_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_UNMOUNT; - - while (journal->j_task) { - wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_done_commit, journal->j_task == 0); - spin_lock(&journal->j_state_lock); - } - spin_unlock(&journal->j_state_lock); -} - -/* - * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. - * - * Writes a metadata buffer to a given disk block. The actual IO is not - * performed but a new buffer_head is constructed which labels the data - * to be written with the correct destination disk block. - * - * Any magic-number escaping which needs to be done will cause a - * copy-out here. If the buffer happens to start with the - * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the - * magic number is only written to the log for descripter blocks. In - * this case, we copy the data and replace the first word with 0, and we - * return a result code which indicates that this buffer needs to be - * marked as an escaped buffer in the corresponding log descriptor - * block. The missing word can then be restored when the block is read - * during recovery. - * - * If the source buffer has already been modified by a new transaction - * since we took the last commit snapshot, we use the frozen copy of - * that data for IO. If we end up using the existing buffer_head's data - * for the write, then we *have* to lock the buffer to prevent anyone - * else from using and possibly modifying it while the IO is in - * progress. - * - * The function returns a pointer to the buffer_heads to be used for IO. - * - * We assume that the journal has already been locked in this function. - * - * Return value: - * <0: Error - * >=0: Finished OK - * - * On success: - * Bit 0 set == escape performed on the data - * Bit 1 set == buffer copy-out performed (kfree the data after IO) - */ - -int jbd2_journal_write_metadata_buffer(transaction_t *transaction, - struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned long long blocknr) -{ - int need_copy_out = 0; - int done_copy_out = 0; - int do_escape = 0; - char *mapped_data; - struct buffer_head *new_bh; - struct journal_head *new_jh; - struct page *new_page; - unsigned int new_offset; - struct buffer_head *bh_in = jh2bh(jh_in); - - /* - * The buffer really shouldn't be locked: only the current committing - * transaction is allowed to write it, so nobody else is allowed - * to do any IO. - * - * akpm: except if we're journalling data, and write() output is - * also part of a shared mapping, and another thread has - * decided to launch a writepage() against this buffer. - */ - J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); - - new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); - - /* - * If a new transaction has already done a buffer copy-out, then - * we use that version of the data for the commit. - */ - jbd_lock_bh_state(bh_in); -repeat: - if (jh_in->b_frozen_data) { - done_copy_out = 1; - new_page = virt_to_page(jh_in->b_frozen_data); - new_offset = offset_in_page(jh_in->b_frozen_data); - } else { - new_page = jh2bh(jh_in)->b_page; - new_offset = offset_in_page(jh2bh(jh_in)->b_data); - } - - mapped_data = kmap_atomic(new_page, KM_USER0); - /* - * Check for escaping - */ - if (*((__be32 *)(mapped_data + new_offset)) == - cpu_to_be32(JBD2_MAGIC_NUMBER)) { - need_copy_out = 1; - do_escape = 1; - } - kunmap_atomic(mapped_data, KM_USER0); - - /* - * Do we need to do a data copy? - */ - if (need_copy_out && !done_copy_out) { - char *tmp; - - jbd_unlock_bh_state(bh_in); - tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS); - jbd_lock_bh_state(bh_in); - if (jh_in->b_frozen_data) { - jbd2_slab_free(tmp, bh_in->b_size); - goto repeat; - } - - jh_in->b_frozen_data = tmp; - mapped_data = kmap_atomic(new_page, KM_USER0); - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); - kunmap_atomic(mapped_data, KM_USER0); - - new_page = virt_to_page(tmp); - new_offset = offset_in_page(tmp); - done_copy_out = 1; - } - - /* - * Did we need to do an escaping? Now we've done all the - * copying, we can finally do so. - */ - if (do_escape) { - mapped_data = kmap_atomic(new_page, KM_USER0); - *((unsigned int *)(mapped_data + new_offset)) = 0; - kunmap_atomic(mapped_data, KM_USER0); - } - - /* keep subsequent assertions sane */ - new_bh->b_state = 0; - init_buffer(new_bh, NULL, NULL); - atomic_set(&new_bh->b_count, 1); - jbd_unlock_bh_state(bh_in); - - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ - - set_bh_page(new_bh, new_page, new_offset); - new_jh->b_transaction = NULL; - new_bh->b_size = jh2bh(jh_in)->b_size; - new_bh->b_bdev = transaction->t_journal->j_dev; - new_bh->b_blocknr = blocknr; - set_buffer_mapped(new_bh); - set_buffer_dirty(new_bh); - - *jh_out = new_jh; - - /* - * The to-be-written buffer needs to get moved to the io queue, - * and the original buffer whose contents we are shadowing or - * copying is moved to the transaction's shadow queue. - */ - JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); - jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); - JBUFFER_TRACE(new_jh, "file as BJ_IO"); - jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); - - return do_escape | (done_copy_out << 1); -} - -/* - * Allocation code for the journal file. Manage the space left in the - * journal, so that we can begin checkpointing when appropriate. - */ - -/* - * __jbd2_log_space_left: Return the number of free blocks left in the journal. - * - * Called with the journal already locked. - * - * Called under j_state_lock - */ - -int __jbd2_log_space_left(journal_t *journal) -{ - int left = journal->j_free; - - assert_spin_locked(&journal->j_state_lock); - - /* - * Be pessimistic here about the number of those free blocks which - * might be required for log descriptor control blocks. - */ - -#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ - - left -= MIN_LOG_RESERVED_BLOCKS; - - if (left <= 0) - return 0; - left -= (left >> 3); - return left; -} - -/* - * Called under j_state_lock. Returns true if a transaction was started. - */ -int __jbd2_log_start_commit(journal_t *journal, tid_t target) -{ - /* - * Are we already doing a recent enough commit? - */ - if (!tid_geq(journal->j_commit_request, target)) { - /* - * We want a new commit: OK, mark the request and wakup the - * commit thread. We do _not_ do the commit ourselves. - */ - - journal->j_commit_request = target; - jbd_debug(1, "JBD: requesting commit %d/%d\n", - journal->j_commit_request, - journal->j_commit_sequence); - wake_up(&journal->j_wait_commit); - return 1; - } - return 0; -} - -int jbd2_log_start_commit(journal_t *journal, tid_t tid) -{ - int ret; - - spin_lock(&journal->j_state_lock); - ret = __jbd2_log_start_commit(journal, tid); - spin_unlock(&journal->j_state_lock); - return ret; -} - -/* - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. - * - * We can only force the running transaction if we don't have an active handle; - * otherwise, we will deadlock. - * - * Returns true if a transaction was started. - */ -int jbd2_journal_force_commit_nested(journal_t *journal) -{ - transaction_t *transaction = NULL; - tid_t tid; - - spin_lock(&journal->j_state_lock); - if (journal->j_running_transaction && !current->journal_info) { - transaction = journal->j_running_transaction; - __jbd2_log_start_commit(journal, transaction->t_tid); - } else if (journal->j_committing_transaction) - transaction = journal->j_committing_transaction; - - if (!transaction) { - spin_unlock(&journal->j_state_lock); - return 0; /* Nothing to retry */ - } - - tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); - jbd2_log_wait_commit(journal, tid); - return 1; -} - -/* - * Start a commit of the current running transaction (if any). Returns true - * if a transaction was started, and fills its tid in at *ptid - */ -int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) -{ - int ret = 0; - - spin_lock(&journal->j_state_lock); - if (journal->j_running_transaction) { - tid_t tid = journal->j_running_transaction->t_tid; - - ret = __jbd2_log_start_commit(journal, tid); - if (ret && ptid) - *ptid = tid; - } else if (journal->j_committing_transaction && ptid) { - /* - * If ext3_write_super() recently started a commit, then we - * have to wait for completion of that transaction - */ - *ptid = journal->j_committing_transaction->t_tid; - ret = 1; - } - spin_unlock(&journal->j_state_lock); - return ret; -} - -/* - * Wait for a specified commit to complete. - * The caller may not hold the journal lock. - */ -int jbd2_log_wait_commit(journal_t *journal, tid_t tid) -{ - int err = 0; - -#ifdef CONFIG_JBD_DEBUG - spin_lock(&journal->j_state_lock); - if (!tid_geq(journal->j_commit_request, tid)) { - printk(KERN_EMERG - "%s: error: j_commit_request=%d, tid=%d\n", - __FUNCTION__, journal->j_commit_request, tid); - } - spin_unlock(&journal->j_state_lock); -#endif - spin_lock(&journal->j_state_lock); - while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", - tid, journal->j_commit_sequence); - wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_done_commit, - !tid_gt(tid, journal->j_commit_sequence)); - spin_lock(&journal->j_state_lock); - } - spin_unlock(&journal->j_state_lock); - - if (unlikely(is_journal_aborted(journal))) { - printk(KERN_EMERG "journal commit I/O error\n"); - err = -EIO; - } - return err; -} - -/* - * Log buffer allocation routines: - */ - -int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) -{ - unsigned long blocknr; - - spin_lock(&journal->j_state_lock); - J_ASSERT(journal->j_free > 1); - - blocknr = journal->j_head; - journal->j_head++; - journal->j_free--; - if (journal->j_head == journal->j_last) - journal->j_head = journal->j_first; - spin_unlock(&journal->j_state_lock); - return jbd2_journal_bmap(journal, blocknr, retp); -} - -/* - * Conversion of logical to physical block numbers for the journal - * - * On external journals the journal blocks are identity-mapped, so - * this is a no-op. If needed, we can use j_blk_offset - everything is - * ready. - */ -int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, - unsigned long long *retp) -{ - int err = 0; - unsigned long long ret; - - if (journal->j_inode) { - ret = bmap(journal->j_inode, blocknr); - if (ret) - *retp = ret; - else { - char b[BDEVNAME_SIZE]; - - printk(KERN_ALERT "%s: journal block not found " - "at offset %lu on %s\n", - __FUNCTION__, - blocknr, - bdevname(journal->j_dev, b)); - err = -EIO; - __journal_abort_soft(journal, err); - } - } else { - *retp = blocknr; /* +journal->j_blk_offset */ - } - return err; -} - -/* - * We play buffer_head aliasing tricks to write data/metadata blocks to - * the journal without copying their contents, but for journal - * descriptor blocks we do need to generate bona fide buffers. - * - * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying - * the buffer's contents they really should run flush_dcache_page(bh->b_page). - * But we don't bother doing that, so there will be coherency problems with - * mmaps of blockdevs which hold live JBD-controlled filesystems. - */ -struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) -{ - struct buffer_head *bh; - unsigned long long blocknr; - int err; - - err = jbd2_journal_next_log_block(journal, &blocknr); - - if (err) - return NULL; - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - lock_buffer(bh); - memset(bh->b_data, 0, journal->j_blocksize); - set_buffer_uptodate(bh); - unlock_buffer(bh); - BUFFER_TRACE(bh, "return this buffer"); - return jbd2_journal_add_journal_head(bh); -} - -/* - * Management for journal control blocks: functions to create and - * destroy journal_t structures, and to initialise and read existing - * journal blocks from disk. */ - -/* First: create and setup a journal_t object in memory. We initialise - * very few fields yet: that has to wait until we have created the - * journal structures from from scratch, or loaded them from disk. */ - -static journal_t * journal_init_common (void) -{ - journal_t *journal; - int err; - - journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); - if (!journal) - goto fail; - memset(journal, 0, sizeof(*journal)); - - init_waitqueue_head(&journal->j_wait_transaction_locked); - init_waitqueue_head(&journal->j_wait_logspace); - init_waitqueue_head(&journal->j_wait_done_commit); - init_waitqueue_head(&journal->j_wait_checkpoint); - init_waitqueue_head(&journal->j_wait_commit); - init_waitqueue_head(&journal->j_wait_updates); - mutex_init(&journal->j_barrier); - mutex_init(&journal->j_checkpoint_mutex); - spin_lock_init(&journal->j_revoke_lock); - spin_lock_init(&journal->j_list_lock); - spin_lock_init(&journal->j_state_lock); - - journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE); - - /* The journal is marked for error until we succeed with recovery! */ - journal->j_flags = JBD2_ABORT; - - /* Set up a default-sized revoke table for the new mount. */ - err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); - if (err) { - kfree(journal); - goto fail; - } - return journal; -fail: - return NULL; -} - -/* jbd2_journal_init_dev and jbd2_journal_init_inode: - * - * Create a journal structure assigned some fixed set of disk blocks to - * the journal. We don't actually touch those disk blocks yet, but we - * need to set up all of the mapping information to tell the journaling - * system where the journal blocks are. - * - */ - -/** - * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure - * @bdev: Block device on which to create the journal - * @fs_dev: Device which hold journalled filesystem for this journal. - * @start: Block nr Start of journal. - * @len: Length of the journal in blocks. - * @blocksize: blocksize of journalling device - * @returns: a newly created journal_t * - * - * jbd2_journal_init_dev creates a journal which maps a fixed contiguous - * range of blocks on an arbitrary block device. - * - */ -journal_t * jbd2_journal_init_dev(struct block_device *bdev, - struct block_device *fs_dev, - unsigned long long start, int len, int blocksize) -{ - journal_t *journal = journal_init_common(); - struct buffer_head *bh; - int n; - - if (!journal) - return NULL; - - /* journal descriptor can store up to n blocks -bzzz */ - journal->j_blocksize = blocksize; - n = journal->j_blocksize / sizeof(journal_block_tag_t); - journal->j_wbufsize = n; - journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); - if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); - kfree(journal); - journal = NULL; - } - journal->j_dev = bdev; - journal->j_fs_dev = fs_dev; - journal->j_blk_offset = start; - journal->j_maxlen = len; - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - - return journal; -} - -/** - * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode. - * @inode: An inode to create the journal in - * - * jbd2_journal_init_inode creates a journal which maps an on-disk inode as - * the journal. The inode must exist already, must support bmap() and - * must have all data blocks preallocated. - */ -journal_t * jbd2_journal_init_inode (struct inode *inode) -{ - struct buffer_head *bh; - journal_t *journal = journal_init_common(); - int err; - int n; - unsigned long long blocknr; - - if (!journal) - return NULL; - - journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; - journal->j_inode = inode; - jbd_debug(1, - "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", - journal, inode->i_sb->s_id, inode->i_ino, - (long long) inode->i_size, - inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); - - journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; - journal->j_blocksize = inode->i_sb->s_blocksize; - - /* journal descriptor can store up to n blocks -bzzz */ - n = journal->j_blocksize / sizeof(journal_block_tag_t); - journal->j_wbufsize = n; - journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); - if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); - kfree(journal); - return NULL; - } - - err = jbd2_journal_bmap(journal, 0, &blocknr); - /* If that failed, give up */ - if (err) { - printk(KERN_ERR "%s: Cannnot locate journal superblock\n", - __FUNCTION__); - kfree(journal); - return NULL; - } - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - J_ASSERT(bh != NULL); - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - - return journal; -} - -/* - * If the journal init or create aborts, we need to mark the journal - * superblock as being NULL to prevent the journal destroy from writing - * back a bogus superblock. - */ -static void journal_fail_superblock (journal_t *journal) -{ - struct buffer_head *bh = journal->j_sb_buffer; - brelse(bh); - journal->j_sb_buffer = NULL; -} - -/* - * Given a journal_t structure, initialise the various fields for - * startup of a new journaling session. We use this both when creating - * a journal, and after recovering an old journal to reset it for - * subsequent use. - */ - -static int journal_reset(journal_t *journal) -{ - journal_superblock_t *sb = journal->j_superblock; - unsigned long long first, last; - - first = be32_to_cpu(sb->s_first); - last = be32_to_cpu(sb->s_maxlen); - - journal->j_first = first; - journal->j_last = last; - - journal->j_head = first; - journal->j_tail = first; - journal->j_free = last - first; - - journal->j_tail_sequence = journal->j_transaction_sequence; - journal->j_commit_sequence = journal->j_transaction_sequence - 1; - journal->j_commit_request = journal->j_commit_sequence; - - journal->j_max_transaction_buffers = journal->j_maxlen / 4; - - /* Add the dynamic fields and write it to disk. */ - jbd2_journal_update_superblock(journal, 1); - jbd2_journal_start_thread(journal); - return 0; -} - -/** - * int jbd2_journal_create() - Initialise the new journal file - * @journal: Journal to create. This structure must have been initialised - * - * Given a journal_t structure which tells us which disk blocks we can - * use, create a new journal superblock and initialise all of the - * journal fields from scratch. - **/ -int jbd2_journal_create(journal_t *journal) -{ - unsigned long long blocknr; - struct buffer_head *bh; - journal_superblock_t *sb; - int i, err; - - if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { - printk (KERN_ERR "Journal length (%d blocks) too short.\n", - journal->j_maxlen); - journal_fail_superblock(journal); - return -EINVAL; - } - - if (journal->j_inode == NULL) { - /* - * We don't know what block to start at! - */ - printk(KERN_EMERG - "%s: creation of journal on external device!\n", - __FUNCTION__); - BUG(); - } - - /* Zero out the entire journal on disk. We cannot afford to - have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ - jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); - for (i = 0; i < journal->j_maxlen; i++) { - err = jbd2_journal_bmap(journal, i, &blocknr); - if (err) - return err; - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - lock_buffer(bh); - memset (bh->b_data, 0, journal->j_blocksize); - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - __brelse(bh); - } - - sync_blockdev(journal->j_dev); - jbd_debug(1, "JBD: journal cleared.\n"); - - /* OK, fill in the initial static fields in the new superblock */ - sb = journal->j_superblock; - - sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); - - sb->s_blocksize = cpu_to_be32(journal->j_blocksize); - sb->s_maxlen = cpu_to_be32(journal->j_maxlen); - sb->s_first = cpu_to_be32(1); - - journal->j_transaction_sequence = 1; - - journal->j_flags &= ~JBD2_ABORT; - journal->j_format_version = 2; - - return journal_reset(journal); -} - -/** - * void jbd2_journal_update_superblock() - Update journal sb on disk. - * @journal: The journal to update. - * @wait: Set to '0' if you don't want to wait for IO completion. - * - * Update a journal's dynamic superblock fields and write it to disk, - * optionally waiting for the IO to complete. - */ -void jbd2_journal_update_superblock(journal_t *journal, int wait) -{ - journal_superblock_t *sb = journal->j_superblock; - struct buffer_head *bh = journal->j_sb_buffer; - - /* - * As a special case, if the on-disk copy is already marked as needing - * no recovery (s_start == 0) and there are no outstanding transactions - * in the filesystem, then we can safely defer the superblock update - * until the next commit by setting JBD2_FLUSHED. This avoids - * attempting a write to a potential-readonly device. - */ - if (sb->s_start == 0 && journal->j_tail_sequence == - journal->j_transaction_sequence) { - jbd_debug(1,"JBD: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, - journal->j_errno); - goto out; - } - - spin_lock(&journal->j_state_lock); - jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, journal->j_errno); - - sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); - sb->s_start = cpu_to_be32(journal->j_tail); - sb->s_errno = cpu_to_be32(journal->j_errno); - spin_unlock(&journal->j_state_lock); - - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - if (wait) - sync_dirty_buffer(bh); - else - ll_rw_block(SWRITE, 1, &bh); - -out: - /* If we have just flushed the log (by marking s_start==0), then - * any future commit will have to be careful to update the - * superblock again to re-record the true start of the log. */ - - spin_lock(&journal->j_state_lock); - if (sb->s_start) - journal->j_flags &= ~JBD2_FLUSHED; - else - journal->j_flags |= JBD2_FLUSHED; - spin_unlock(&journal->j_state_lock); -} - -/* - * Read the superblock for a given journal, performing initial - * validation of the format. - */ - -static int journal_get_superblock(journal_t *journal) -{ - struct buffer_head *bh; - journal_superblock_t *sb; - int err = -EIO; - - bh = journal->j_sb_buffer; - - J_ASSERT(bh != NULL); - if (!buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - printk (KERN_ERR - "JBD: IO error reading journal superblock\n"); - goto out; - } - } - - sb = journal->j_superblock; - - err = -EINVAL; - - if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || - sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { - printk(KERN_WARNING "JBD: no valid journal superblock found\n"); - goto out; - } - - switch(be32_to_cpu(sb->s_header.h_blocktype)) { - case JBD2_SUPERBLOCK_V1: - journal->j_format_version = 1; - break; - case JBD2_SUPERBLOCK_V2: - journal->j_format_version = 2; - break; - default: - printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); - goto out; - } - - if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = be32_to_cpu(sb->s_maxlen); - else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { - printk (KERN_WARNING "JBD: journal file too short\n"); - goto out; - } - - return 0; - -out: - journal_fail_superblock(journal); - return err; -} - -/* - * Load the on-disk journal superblock and read the key fields into the - * journal_t. - */ - -static int load_superblock(journal_t *journal) -{ - int err; - journal_superblock_t *sb; - - err = journal_get_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - - journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); - journal->j_tail = be32_to_cpu(sb->s_start); - journal->j_first = be32_to_cpu(sb->s_first); - journal->j_last = be32_to_cpu(sb->s_maxlen); - journal->j_errno = be32_to_cpu(sb->s_errno); - - return 0; -} - - -/** - * int jbd2_journal_load() - Read journal from disk. - * @journal: Journal to act on. - * - * Given a journal_t structure which tells us which disk blocks contain - * a journal, read the journal from disk to initialise the in-memory - * structures. - */ -int jbd2_journal_load(journal_t *journal) -{ - int err; - journal_superblock_t *sb; - - err = load_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - /* If this is a V2 superblock, then we have to check the - * features flags on it. */ - - if (journal->j_format_version >= 2) { - if ((sb->s_feature_ro_compat & - ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || - (sb->s_feature_incompat & - ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { - printk (KERN_WARNING - "JBD: Unrecognised features on journal\n"); - return -EINVAL; - } - } - - /* - * Create a slab for this blocksize - */ - err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); - if (err) - return err; - - /* Let the recovery code check whether it needs to recover any - * data from the journal. */ - if (jbd2_journal_recover(journal)) - goto recovery_error; - - /* OK, we've finished with the dynamic journal bits: - * reinitialise the dynamic contents of the superblock in memory - * and reset them on disk. */ - if (journal_reset(journal)) - goto recovery_error; - - journal->j_flags &= ~JBD2_ABORT; - journal->j_flags |= JBD2_LOADED; - return 0; - -recovery_error: - printk (KERN_WARNING "JBD: recovery failed\n"); - return -EIO; -} - -/** - * void jbd2_journal_destroy() - Release a journal_t structure. - * @journal: Journal to act on. - * - * Release a journal_t structure once it is no longer in use by the - * journaled object. - */ -void jbd2_journal_destroy(journal_t *journal) -{ - /* Wait for the commit thread to wake up and die. */ - journal_kill_thread(journal); - - /* Force a final log commit */ - if (journal->j_running_transaction) - jbd2_journal_commit_transaction(journal); - - /* Force any old transactions to disk */ - - /* Totally anal locking here... */ - spin_lock(&journal->j_list_lock); - while (journal->j_checkpoint_transactions != NULL) { - spin_unlock(&journal->j_list_lock); - jbd2_log_do_checkpoint(journal); - spin_lock(&journal->j_list_lock); - } - - J_ASSERT(journal->j_running_transaction == NULL); - J_ASSERT(journal->j_committing_transaction == NULL); - J_ASSERT(journal->j_checkpoint_transactions == NULL); - spin_unlock(&journal->j_list_lock); - - /* We can now mark the journal as empty. */ - journal->j_tail = 0; - journal->j_tail_sequence = ++journal->j_transaction_sequence; - if (journal->j_sb_buffer) { - jbd2_journal_update_superblock(journal, 1); - brelse(journal->j_sb_buffer); - } - - if (journal->j_inode) - iput(journal->j_inode); - if (journal->j_revoke) - jbd2_journal_destroy_revoke(journal); - kfree(journal->j_wbuf); - kfree(journal); -} - - -/** - *int jbd2_journal_check_used_features () - Check if features specified are used. - * @journal: Journal to check. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Check whether the journal uses all of a given set of - * features. Return true (non-zero) if it does. - **/ - -int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - journal_superblock_t *sb; - - if (!compat && !ro && !incompat) - return 1; - if (journal->j_format_version == 1) - return 0; - - sb = journal->j_superblock; - - if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && - ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && - ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) - return 1; - - return 0; -} - -/** - * int jbd2_journal_check_available_features() - Check feature set in journalling layer - * @journal: Journal to check. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Check whether the journaling code supports the use of - * all of a given set of features on this journal. Return true - * (non-zero) if it can. */ - -int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - journal_superblock_t *sb; - - if (!compat && !ro && !incompat) - return 1; - - sb = journal->j_superblock; - - /* We can support any known requested features iff the - * superblock is in version 2. Otherwise we fail to support any - * extended sb features. */ - - if (journal->j_format_version != 2) - return 0; - - if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat && - (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro && - (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat) - return 1; - - return 0; -} - -/** - * int jbd2_journal_set_features () - Mark a given journal feature in the superblock - * @journal: Journal to act on. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Mark a given journal feature as present on the - * superblock. Returns true if the requested features could be set. - * - */ - -int jbd2_journal_set_features (journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - journal_superblock_t *sb; - - if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) - return 1; - - if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) - return 0; - - jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", - compat, ro, incompat); - - sb = journal->j_superblock; - - sb->s_feature_compat |= cpu_to_be32(compat); - sb->s_feature_ro_compat |= cpu_to_be32(ro); - sb->s_feature_incompat |= cpu_to_be32(incompat); - - return 1; -} - - -/** - * int jbd2_journal_update_format () - Update on-disk journal structure. - * @journal: Journal to act on. - * - * Given an initialised but unloaded journal struct, poke about in the - * on-disk structure to update it to the most recent supported version. - */ -int jbd2_journal_update_format (journal_t *journal) -{ - journal_superblock_t *sb; - int err; - - err = journal_get_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - - switch (be32_to_cpu(sb->s_header.h_blocktype)) { - case JBD2_SUPERBLOCK_V2: - return 0; - case JBD2_SUPERBLOCK_V1: - return journal_convert_superblock_v1(journal, sb); - default: - break; - } - return -EINVAL; -} - -static int journal_convert_superblock_v1(journal_t *journal, - journal_superblock_t *sb) -{ - int offset, blocksize; - struct buffer_head *bh; - - printk(KERN_WARNING - "JBD: Converting superblock from version 1 to 2.\n"); - - /* Pre-initialise new fields to zero */ - offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); - blocksize = be32_to_cpu(sb->s_blocksize); - memset(&sb->s_feature_compat, 0, blocksize-offset); - - sb->s_nr_users = cpu_to_be32(1); - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); - journal->j_format_version = 2; - - bh = journal->j_sb_buffer; - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - sync_dirty_buffer(bh); - return 0; -} - - -/** - * int jbd2_journal_flush () - Flush journal - * @journal: Journal to act on. - * - * Flush all data for a given journal to disk and empty the journal. - * Filesystems can use this when remounting readonly to ensure that - * recovery does not need to happen on remount. - */ - -int jbd2_journal_flush(journal_t *journal) -{ - int err = 0; - transaction_t *transaction = NULL; - unsigned long old_tail; - - spin_lock(&journal->j_state_lock); - - /* Force everything buffered to the log... */ - if (journal->j_running_transaction) { - transaction = journal->j_running_transaction; - __jbd2_log_start_commit(journal, transaction->t_tid); - } else if (journal->j_committing_transaction) - transaction = journal->j_committing_transaction; - - /* Wait for the log commit to complete... */ - if (transaction) { - tid_t tid = transaction->t_tid; - - spin_unlock(&journal->j_state_lock); - jbd2_log_wait_commit(journal, tid); - } else { - spin_unlock(&journal->j_state_lock); - } - - /* ...and flush everything in the log out to disk. */ - spin_lock(&journal->j_list_lock); - while (!err && journal->j_checkpoint_transactions != NULL) { - spin_unlock(&journal->j_list_lock); - err = jbd2_log_do_checkpoint(journal); - spin_lock(&journal->j_list_lock); - } - spin_unlock(&journal->j_list_lock); - jbd2_cleanup_journal_tail(journal); - - /* Finally, mark the journal as really needing no recovery. - * This sets s_start==0 in the underlying superblock, which is - * the magic code for a fully-recovered superblock. Any future - * commits of data to the journal will restore the current - * s_start value. */ - spin_lock(&journal->j_state_lock); - old_tail = journal->j_tail; - journal->j_tail = 0; - spin_unlock(&journal->j_state_lock); - jbd2_journal_update_superblock(journal, 1); - spin_lock(&journal->j_state_lock); - journal->j_tail = old_tail; - - J_ASSERT(!journal->j_running_transaction); - J_ASSERT(!journal->j_committing_transaction); - J_ASSERT(!journal->j_checkpoint_transactions); - J_ASSERT(journal->j_head == journal->j_tail); - J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); - spin_unlock(&journal->j_state_lock); - return err; -} - -/** - * int jbd2_journal_wipe() - Wipe journal contents - * @journal: Journal to act on. - * @write: flag (see below) - * - * Wipe out all of the contents of a journal, safely. This will produce - * a warning if the journal contains any valid recovery information. - * Must be called between journal_init_*() and jbd2_journal_load(). - * - * If 'write' is non-zero, then we wipe out the journal on disk; otherwise - * we merely suppress recovery. - */ - -int jbd2_journal_wipe(journal_t *journal, int write) -{ - journal_superblock_t *sb; - int err = 0; - - J_ASSERT (!(journal->j_flags & JBD2_LOADED)); - - err = load_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - - if (!journal->j_tail) - goto no_recovery; - - printk (KERN_WARNING "JBD: %s recovery information on journal\n", - write ? "Clearing" : "Ignoring"); - - err = jbd2_journal_skip_recovery(journal); - if (write) - jbd2_journal_update_superblock(journal, 1); - - no_recovery: - return err; -} - -/* - * journal_dev_name: format a character string to describe on what - * device this journal is present. - */ - -static const char *journal_dev_name(journal_t *journal, char *buffer) -{ - struct block_device *bdev; - - if (journal->j_inode) - bdev = journal->j_inode->i_sb->s_bdev; - else - bdev = journal->j_dev; - - return bdevname(bdev, buffer); -} - -/* - * Journal abort has very specific semantics, which we describe - * for journal abort. - * - * Two internal function, which provide abort to te jbd layer - * itself are here. - */ - -/* - * Quick version for internal journal use (doesn't lock the journal). - * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, - * and don't attempt to make any other journal updates. - */ -void __jbd2_journal_abort_hard(journal_t *journal) -{ - transaction_t *transaction; - char b[BDEVNAME_SIZE]; - - if (journal->j_flags & JBD2_ABORT) - return; - - printk(KERN_ERR "Aborting journal on device %s.\n", - journal_dev_name(journal, b)); - - spin_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_ABORT; - transaction = journal->j_running_transaction; - if (transaction) - __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); -} - -/* Soft abort: record the abort error status in the journal superblock, - * but don't do any other IO. */ -static void __journal_abort_soft (journal_t *journal, int errno) -{ - if (journal->j_flags & JBD2_ABORT) - return; - - if (!journal->j_errno) - journal->j_errno = errno; - - __jbd2_journal_abort_hard(journal); - - if (errno) - jbd2_journal_update_superblock(journal, 1); -} - -/** - * void jbd2_journal_abort () - Shutdown the journal immediately. - * @journal: the journal to shutdown. - * @errno: an error number to record in the journal indicating - * the reason for the shutdown. - * - * Perform a complete, immediate shutdown of the ENTIRE - * journal (not of a single transaction). This operation cannot be - * undone without closing and reopening the journal. - * - * The jbd2_journal_abort function is intended to support higher level error - * recovery mechanisms such as the ext2/ext3 remount-readonly error - * mode. - * - * Journal abort has very specific semantics. Any existing dirty, - * unjournaled buffers in the main filesystem will still be written to - * disk by bdflush, but the journaling mechanism will be suspended - * immediately and no further transaction commits will be honoured. - * - * Any dirty, journaled buffers will be written back to disk without - * hitting the journal. Atomicity cannot be guaranteed on an aborted - * filesystem, but we _do_ attempt to leave as much data as possible - * behind for fsck to use for cleanup. - * - * Any attempt to get a new transaction handle on a journal which is in - * ABORT state will just result in an -EROFS error return. A - * jbd2_journal_stop on an existing handle will return -EIO if we have - * entered abort state during the update. - * - * Recursive transactions are not disturbed by journal abort until the - * final jbd2_journal_stop, which will receive the -EIO error. - * - * Finally, the jbd2_journal_abort call allows the caller to supply an errno - * which will be recorded (if possible) in the journal superblock. This - * allows a client to record failure conditions in the middle of a - * transaction without having to complete the transaction to record the - * failure to disk. ext3_error, for example, now uses this - * functionality. - * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * - */ - -void jbd2_journal_abort(journal_t *journal, int errno) -{ - __journal_abort_soft(journal, errno); -} - -/** - * int jbd2_journal_errno () - returns the journal's error state. - * @journal: journal to examine. - * - * This is the errno numbet set with jbd2_journal_abort(), the last - * time the journal was mounted - if the journal was stopped - * without calling abort this will be 0. - * - * If the journal has been aborted on this mount time -EROFS will - * be returned. - */ -int jbd2_journal_errno(journal_t *journal) -{ - int err; - - spin_lock(&journal->j_state_lock); - if (journal->j_flags & JBD2_ABORT) - err = -EROFS; - else - err = journal->j_errno; - spin_unlock(&journal->j_state_lock); - return err; -} - -/** - * int jbd2_journal_clear_err () - clears the journal's error state - * @journal: journal to act on. - * - * An error must be cleared or Acked to take a FS out of readonly - * mode. - */ -int jbd2_journal_clear_err(journal_t *journal) -{ - int err = 0; - - spin_lock(&journal->j_state_lock); - if (journal->j_flags & JBD2_ABORT) - err = -EROFS; - else - journal->j_errno = 0; - spin_unlock(&journal->j_state_lock); - return err; -} - -/** - * void jbd2_journal_ack_err() - Ack journal err. - * @journal: journal to act on. - * - * An error must be cleared or Acked to take a FS out of readonly - * mode. - */ -void jbd2_journal_ack_err(journal_t *journal) -{ - spin_lock(&journal->j_state_lock); - if (journal->j_errno) - journal->j_flags |= JBD2_ACK_ERR; - spin_unlock(&journal->j_state_lock); -} - -int jbd2_journal_blocks_per_page(struct inode *inode) -{ - return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); -} - -/* - * helper functions to deal with 32 or 64bit block numbers. - */ -size_t journal_tag_bytes(journal_t *journal) -{ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return JBD_TAG_SIZE64; - else - return JBD_TAG_SIZE32; -} - -/* - * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. - */ -void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry) -{ - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); -} - -/* - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed - * and allocate frozen and commit buffers from these slabs. - * - * Reason for doing this is to avoid, SLAB_DEBUG - since it could - * cause bh to cross page boundary. - */ - -#define JBD_MAX_SLABS 5 -#define JBD_SLAB_INDEX(size) (size >> 11) - -static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; -static const char *jbd_slab_names[JBD_MAX_SLABS] = { - "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k" -}; - -static void jbd2_journal_destroy_jbd_slabs(void) -{ - int i; - - for (i = 0; i < JBD_MAX_SLABS; i++) { - if (jbd_slab[i]) - kmem_cache_destroy(jbd_slab[i]); - jbd_slab[i] = NULL; - } -} - -static int jbd2_journal_create_jbd_slab(size_t slab_size) -{ - int i = JBD_SLAB_INDEX(slab_size); - - BUG_ON(i >= JBD_MAX_SLABS); - - /* - * Check if we already have a slab created for this size - */ - if (jbd_slab[i]) - return 0; - - /* - * Create a slab and force alignment to be same as slabsize - - * this will make sure that allocations won't cross the page - * boundary. - */ - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL, NULL); - if (!jbd_slab[i]) { - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); - return -ENOMEM; - } - return 0; -} - -void * jbd2_slab_alloc(size_t size, gfp_t flags) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); -} - -void jbd2_slab_free(void *ptr, size_t size) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - kmem_cache_free(jbd_slab[idx], ptr); -} - -/* - * Journal_head storage management - */ -static kmem_cache_t *jbd2_journal_head_cache; -#ifdef CONFIG_JBD_DEBUG -static atomic_t nr_journal_heads = ATOMIC_INIT(0); -#endif - -static int journal_init_jbd2_journal_head_cache(void) -{ - int retval; - - J_ASSERT(jbd2_journal_head_cache == 0); - jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", - sizeof(struct journal_head), - 0, /* offset */ - 0, /* flags */ - NULL, /* ctor */ - NULL); /* dtor */ - retval = 0; - if (jbd2_journal_head_cache == 0) { - retval = -ENOMEM; - printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); - } - return retval; -} - -static void jbd2_journal_destroy_jbd2_journal_head_cache(void) -{ - J_ASSERT(jbd2_journal_head_cache != NULL); - kmem_cache_destroy(jbd2_journal_head_cache); - jbd2_journal_head_cache = NULL; -} - -/* - * journal_head splicing and dicing - */ -static struct journal_head *journal_alloc_journal_head(void) -{ - struct journal_head *ret; - static unsigned long last_warning; - -#ifdef CONFIG_JBD_DEBUG - atomic_inc(&nr_journal_heads); -#endif - ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); - if (ret == 0) { - jbd_debug(1, "out of memory for journal_head\n"); - if (time_after(jiffies, last_warning + 5*HZ)) { - printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", - __FUNCTION__); - last_warning = jiffies; - } - while (ret == 0) { - yield(); - ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); - } - } - return ret; -} - -static void journal_free_journal_head(struct journal_head *jh) -{ -#ifdef CONFIG_JBD_DEBUG - atomic_dec(&nr_journal_heads); - memset(jh, JBD_POISON_FREE, sizeof(*jh)); -#endif - kmem_cache_free(jbd2_journal_head_cache, jh); -} - -/* - * A journal_head is attached to a buffer_head whenever JBD has an - * interest in the buffer. - * - * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit - * is set. This bit is tested in core kernel code where we need to take - * JBD-specific actions. Testing the zeroness of ->b_private is not reliable - * there. - * - * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. - * - * When a buffer has its BH_JBD bit set it is immune from being released by - * core kernel code, mainly via ->b_count. - * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. - * - * Various places in the kernel want to attach a journal_head to a buffer_head - * _before_ attaching the journal_head to a transaction. To protect the - * journal_head in this situation, jbd2_journal_add_journal_head elevates the - * journal_head's b_jcount refcount by one. The caller must call - * jbd2_journal_put_journal_head() to undo this. - * - * So the typical usage would be: - * - * (Attach a journal_head if needed. Increments b_jcount) - * struct journal_head *jh = jbd2_journal_add_journal_head(bh); - * ... - * jh->b_transaction = xxx; - * jbd2_journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. - */ - -/* - * Give a buffer_head a journal_head. - * - * Doesn't need the journal lock. - * May sleep. - */ -struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) -{ - struct journal_head *jh; - struct journal_head *new_jh = NULL; - -repeat: - if (!buffer_jbd(bh)) { - new_jh = journal_alloc_journal_head(); - memset(new_jh, 0, sizeof(*new_jh)); - } - - jbd_lock_bh_journal_head(bh); - if (buffer_jbd(bh)) { - jh = bh2jh(bh); - } else { - J_ASSERT_BH(bh, - (atomic_read(&bh->b_count) > 0) || - (bh->b_page && bh->b_page->mapping)); - - if (!new_jh) { - jbd_unlock_bh_journal_head(bh); - goto repeat; - } - - jh = new_jh; - new_jh = NULL; /* We consumed it */ - set_buffer_jbd(bh); - bh->b_private = jh; - jh->b_bh = bh; - get_bh(bh); - BUFFER_TRACE(bh, "added journal_head"); - } - jh->b_jcount++; - jbd_unlock_bh_journal_head(bh); - if (new_jh) - journal_free_journal_head(new_jh); - return bh->b_private; -} - -/* - * Grab a ref against this buffer_head's journal_head. If it ended up not - * having a journal_head, return NULL - */ -struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) -{ - struct journal_head *jh = NULL; - - jbd_lock_bh_journal_head(bh); - if (buffer_jbd(bh)) { - jh = bh2jh(bh); - jh->b_jcount++; - } - jbd_unlock_bh_journal_head(bh); - return jh; -} - -static void __journal_remove_journal_head(struct buffer_head *bh) -{ - struct journal_head *jh = bh2jh(bh); - - J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __FUNCTION__); - jbd2_slab_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __FUNCTION__); - jbd2_slab_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } - } -} - -/* - * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void jbd2_journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to - * release the journal_head from the buffer_head. - */ -void jbd2_journal_put_journal_head(struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - jbd_lock_bh_journal_head(bh); - J_ASSERT_JH(jh, jh->b_jcount > 0); - --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { - __journal_remove_journal_head(bh); - __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); -} - -/* - * /proc tunables - */ -#if defined(CONFIG_JBD_DEBUG) -int jbd2_journal_enable_debug; -EXPORT_SYMBOL(jbd2_journal_enable_debug); -#endif - -#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) - -static struct proc_dir_entry *proc_jbd_debug; - -static int read_jbd_debug(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int ret; - - ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug); - *eof = 1; - return ret; -} - -static int write_jbd_debug(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - char buf[32]; - - if (count > ARRAY_SIZE(buf) - 1) - count = ARRAY_SIZE(buf) - 1; - if (copy_from_user(buf, buffer, count)) - return -EFAULT; - buf[ARRAY_SIZE(buf) - 1] = '\0'; - jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10); - return count; -} - -#define JBD_PROC_NAME "sys/fs/jbd2-debug" - -static void __init create_jbd_proc_entry(void) -{ - proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL); - if (proc_jbd_debug) { - /* Why is this so hard? */ - proc_jbd_debug->read_proc = read_jbd_debug; - proc_jbd_debug->write_proc = write_jbd_debug; - } -} - -static void __exit jbd2_remove_jbd_proc_entry(void) -{ - if (proc_jbd_debug) - remove_proc_entry(JBD_PROC_NAME, NULL); -} - -#else - -#define create_jbd_proc_entry() do {} while (0) -#define jbd2_remove_jbd_proc_entry() do {} while (0) - -#endif - -kmem_cache_t *jbd2_handle_cache; - -static int __init journal_init_handle_cache(void) -{ - jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", - sizeof(handle_t), - 0, /* offset */ - 0, /* flags */ - NULL, /* ctor */ - NULL); /* dtor */ - if (jbd2_handle_cache == NULL) { - printk(KERN_EMERG "JBD: failed to create handle cache\n"); - return -ENOMEM; - } - return 0; -} - -static void jbd2_journal_destroy_handle_cache(void) -{ - if (jbd2_handle_cache) - kmem_cache_destroy(jbd2_handle_cache); -} - -/* - * Module startup and shutdown - */ - -static int __init journal_init_caches(void) -{ - int ret; - - ret = jbd2_journal_init_revoke_caches(); - if (ret == 0) - ret = journal_init_jbd2_journal_head_cache(); - if (ret == 0) - ret = journal_init_handle_cache(); - return ret; -} - -static void jbd2_journal_destroy_caches(void) -{ - jbd2_journal_destroy_revoke_caches(); - jbd2_journal_destroy_jbd2_journal_head_cache(); - jbd2_journal_destroy_handle_cache(); - jbd2_journal_destroy_jbd_slabs(); -} - -static int __init journal_init(void) -{ - int ret; - - BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); - - ret = journal_init_caches(); - if (ret != 0) - jbd2_journal_destroy_caches(); - create_jbd_proc_entry(); - return ret; -} - -static void __exit journal_exit(void) -{ -#ifdef CONFIG_JBD_DEBUG - int n = atomic_read(&nr_journal_heads); - if (n) - printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); -#endif - jbd2_remove_jbd_proc_entry(); - jbd2_journal_destroy_caches(); -} - -MODULE_LICENSE("GPL"); -module_init(journal_init); -module_exit(journal_exit); - diff --git a/trunk/fs/jbd2/recovery.c b/trunk/fs/jbd2/recovery.c deleted file mode 100644 index 9f10acafaf70..000000000000 --- a/trunk/fs/jbd2/recovery.c +++ /dev/null @@ -1,609 +0,0 @@ -/* - * linux/fs/recovery.c - * - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1999-2000 Red Hat Software --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Journal recovery routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. - */ - -#ifndef __KERNEL__ -#include "jfs_user.h" -#else -#include -#include -#include -#include -#include -#endif - -/* - * Maintain information about the progress of the recovery job, so that - * the different passes can carry information between them. - */ -struct recovery_info -{ - tid_t start_transaction; - tid_t end_transaction; - - int nr_replays; - int nr_revokes; - int nr_revoke_hits; -}; - -enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; -static int do_one_pass(journal_t *journal, - struct recovery_info *info, enum passtype pass); -static int scan_revoke_records(journal_t *, struct buffer_head *, - tid_t, struct recovery_info *); - -#ifdef __KERNEL__ - -/* Release readahead buffers after use */ -static void journal_brelse_array(struct buffer_head *b[], int n) -{ - while (--n >= 0) - brelse (b[n]); -} - - -/* - * When reading from the journal, we are going through the block device - * layer directly and so there is no readahead being done for us. We - * need to implement any readahead ourselves if we want it to happen at - * all. Recovery is basically one long sequential read, so make sure we - * do the IO in reasonably large chunks. - * - * This is not so critical that we need to be enormously clever about - * the readahead size, though. 128K is a purely arbitrary, good-enough - * fixed value. - */ - -#define MAXBUF 8 -static int do_readahead(journal_t *journal, unsigned int start) -{ - int err; - unsigned int max, nbufs, next; - unsigned long long blocknr; - struct buffer_head *bh; - - struct buffer_head * bufs[MAXBUF]; - - /* Do up to 128K of readahead */ - max = start + (128 * 1024 / journal->j_blocksize); - if (max > journal->j_maxlen) - max = journal->j_maxlen; - - /* Do the readahead itself. We'll submit MAXBUF buffer_heads at - * a time to the block device IO layer. */ - - nbufs = 0; - - for (next = start; next < max; next++) { - err = jbd2_journal_bmap(journal, next, &blocknr); - - if (err) { - printk (KERN_ERR "JBD: bad block at offset %u\n", - next); - goto failed; - } - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) { - err = -ENOMEM; - goto failed; - } - - if (!buffer_uptodate(bh) && !buffer_locked(bh)) { - bufs[nbufs++] = bh; - if (nbufs == MAXBUF) { - ll_rw_block(READ, nbufs, bufs); - journal_brelse_array(bufs, nbufs); - nbufs = 0; - } - } else - brelse(bh); - } - - if (nbufs) - ll_rw_block(READ, nbufs, bufs); - err = 0; - -failed: - if (nbufs) - journal_brelse_array(bufs, nbufs); - return err; -} - -#endif /* __KERNEL__ */ - - -/* - * Read a block from the journal - */ - -static int jread(struct buffer_head **bhp, journal_t *journal, - unsigned int offset) -{ - int err; - unsigned long long blocknr; - struct buffer_head *bh; - - *bhp = NULL; - - if (offset >= journal->j_maxlen) { - printk(KERN_ERR "JBD: corrupted journal superblock\n"); - return -EIO; - } - - err = jbd2_journal_bmap(journal, offset, &blocknr); - - if (err) { - printk (KERN_ERR "JBD: bad block at offset %u\n", - offset); - return err; - } - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) - return -ENOMEM; - - if (!buffer_uptodate(bh)) { - /* If this is a brand new buffer, start readahead. - Otherwise, we assume we are already reading it. */ - if (!buffer_req(bh)) - do_readahead(journal, offset); - wait_on_buffer(bh); - } - - if (!buffer_uptodate(bh)) { - printk (KERN_ERR "JBD: Failed to read block at offset %u\n", - offset); - brelse(bh); - return -EIO; - } - - *bhp = bh; - return 0; -} - - -/* - * Count the number of in-use tags in a journal descriptor block. - */ - -static int count_tags(journal_t *journal, struct buffer_head *bh) -{ - char * tagp; - journal_block_tag_t * tag; - int nr = 0, size = journal->j_blocksize; - int tag_bytes = journal_tag_bytes(journal); - - tagp = &bh->b_data[sizeof(journal_header_t)]; - - while ((tagp - bh->b_data + tag_bytes) <= size) { - tag = (journal_block_tag_t *) tagp; - - nr++; - tagp += tag_bytes; - if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) - tagp += 16; - - if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) - break; - } - - return nr; -} - - -/* Make sure we wrap around the log correctly! */ -#define wrap(journal, var) \ -do { \ - if (var >= (journal)->j_last) \ - var -= ((journal)->j_last - (journal)->j_first); \ -} while (0) - -/** - * jbd2_journal_recover - recovers a on-disk journal - * @journal: the journal to recover - * - * The primary function for recovering the log contents when mounting a - * journaled device. - * - * Recovery is done in three passes. In the first pass, we look for the - * end of the log. In the second, we assemble the list of revoke - * blocks. In the third and final pass, we replay any un-revoked blocks - * in the log. - */ -int jbd2_journal_recover(journal_t *journal) -{ - int err; - journal_superblock_t * sb; - - struct recovery_info info; - - memset(&info, 0, sizeof(info)); - sb = journal->j_superblock; - - /* - * The journal superblock's s_start field (the current log head) - * is always zero if, and only if, the journal was cleanly - * unmounted. - */ - - if (!sb->s_start) { - jbd_debug(1, "No recovery required, last transaction %d\n", - be32_to_cpu(sb->s_sequence)); - journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; - return 0; - } - - err = do_one_pass(journal, &info, PASS_SCAN); - if (!err) - err = do_one_pass(journal, &info, PASS_REVOKE); - if (!err) - err = do_one_pass(journal, &info, PASS_REPLAY); - - jbd_debug(0, "JBD: recovery, exit status %d, " - "recovered transactions %u to %u\n", - err, info.start_transaction, info.end_transaction); - jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", - info.nr_replays, info.nr_revoke_hits, info.nr_revokes); - - /* Restart the log at the next transaction ID, thus invalidating - * any existing commit records in the log. */ - journal->j_transaction_sequence = ++info.end_transaction; - - jbd2_journal_clear_revoke(journal); - sync_blockdev(journal->j_fs_dev); - return err; -} - -/** - * jbd2_journal_skip_recovery - Start journal and wipe exiting records - * @journal: journal to startup - * - * Locate any valid recovery information from the journal and set up the - * journal structures in memory to ignore it (presumably because the - * caller has evidence that it is out of date). - * This function does'nt appear to be exorted.. - * - * We perform one pass over the journal to allow us to tell the user how - * much recovery information is being erased, and to let us initialise - * the journal transaction sequence numbers to the next unused ID. - */ -int jbd2_journal_skip_recovery(journal_t *journal) -{ - int err; - journal_superblock_t * sb; - - struct recovery_info info; - - memset (&info, 0, sizeof(info)); - sb = journal->j_superblock; - - err = do_one_pass(journal, &info, PASS_SCAN); - - if (err) { - printk(KERN_ERR "JBD: error %d scanning journal\n", err); - ++journal->j_transaction_sequence; - } else { -#ifdef CONFIG_JBD_DEBUG - int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); -#endif - jbd_debug(0, - "JBD: ignoring %d transaction%s from the journal.\n", - dropped, (dropped == 1) ? "" : "s"); - journal->j_transaction_sequence = ++info.end_transaction; - } - - journal->j_tail = 0; - return err; -} - -static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) -{ - unsigned long long block = be32_to_cpu(tag->t_blocknr); - if (tag_bytes > JBD_TAG_SIZE32) - block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; - return block; -} - -static int do_one_pass(journal_t *journal, - struct recovery_info *info, enum passtype pass) -{ - unsigned int first_commit_ID, next_commit_ID; - unsigned long next_log_block; - int err, success = 0; - journal_superblock_t * sb; - journal_header_t * tmp; - struct buffer_head * bh; - unsigned int sequence; - int blocktype; - int tag_bytes = journal_tag_bytes(journal); - - /* Precompute the maximum metadata descriptors in a descriptor block */ - int MAX_BLOCKS_PER_DESC; - MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) - / tag_bytes); - - /* - * First thing is to establish what we expect to find in the log - * (in terms of transaction IDs), and where (in terms of log - * block offsets): query the superblock. - */ - - sb = journal->j_superblock; - next_commit_ID = be32_to_cpu(sb->s_sequence); - next_log_block = be32_to_cpu(sb->s_start); - - first_commit_ID = next_commit_ID; - if (pass == PASS_SCAN) - info->start_transaction = first_commit_ID; - - jbd_debug(1, "Starting recovery pass %d\n", pass); - - /* - * Now we walk through the log, transaction by transaction, - * making sure that each transaction has a commit block in the - * expected place. Each complete transaction gets replayed back - * into the main filesystem. - */ - - while (1) { - int flags; - char * tagp; - journal_block_tag_t * tag; - struct buffer_head * obh; - struct buffer_head * nbh; - - cond_resched(); /* We're under lock_kernel() */ - - /* If we already know where to stop the log traversal, - * check right now that we haven't gone past the end of - * the log. */ - - if (pass != PASS_SCAN) - if (tid_geq(next_commit_ID, info->end_transaction)) - break; - - jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", - next_commit_ID, next_log_block, journal->j_last); - - /* Skip over each chunk of the transaction looking - * either the next descriptor block or the final commit - * record. */ - - jbd_debug(3, "JBD: checking block %ld\n", next_log_block); - err = jread(&bh, journal, next_log_block); - if (err) - goto failed; - - next_log_block++; - wrap(journal, next_log_block); - - /* What kind of buffer is it? - * - * If it is a descriptor block, check that it has the - * expected sequence number. Otherwise, we're all done - * here. */ - - tmp = (journal_header_t *)bh->b_data; - - if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { - brelse(bh); - break; - } - - blocktype = be32_to_cpu(tmp->h_blocktype); - sequence = be32_to_cpu(tmp->h_sequence); - jbd_debug(3, "Found magic %d, sequence %d\n", - blocktype, sequence); - - if (sequence != next_commit_ID) { - brelse(bh); - break; - } - - /* OK, we have a valid descriptor block which matches - * all of the sequence number checks. What are we going - * to do with it? That depends on the pass... */ - - switch(blocktype) { - case JBD2_DESCRIPTOR_BLOCK: - /* If it is a valid descriptor block, replay it - * in pass REPLAY; otherwise, just skip over the - * blocks it describes. */ - if (pass != PASS_REPLAY) { - next_log_block += count_tags(journal, bh); - wrap(journal, next_log_block); - brelse(bh); - continue; - } - - /* A descriptor block: we can now write all of - * the data blocks. Yay, useful work is finally - * getting done here! */ - - tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data + tag_bytes) - <= journal->j_blocksize) { - unsigned long io_block; - - tag = (journal_block_tag_t *) tagp; - flags = be32_to_cpu(tag->t_flags); - - io_block = next_log_block++; - wrap(journal, next_log_block); - err = jread(&obh, journal, io_block); - if (err) { - /* Recover what we can, but - * report failure at the end. */ - success = err; - printk (KERN_ERR - "JBD: IO error %d recovering " - "block %ld in log\n", - err, io_block); - } else { - unsigned long long blocknr; - - J_ASSERT(obh != NULL); - blocknr = read_tag_block(tag_bytes, - tag); - - /* If the block has been - * revoked, then we're all done - * here. */ - if (jbd2_journal_test_revoke - (journal, blocknr, - next_commit_ID)) { - brelse(obh); - ++info->nr_revoke_hits; - goto skip_write; - } - - /* Find a buffer for the new - * data being restored */ - nbh = __getblk(journal->j_fs_dev, - blocknr, - journal->j_blocksize); - if (nbh == NULL) { - printk(KERN_ERR - "JBD: Out of memory " - "during recovery.\n"); - err = -ENOMEM; - brelse(bh); - brelse(obh); - goto failed; - } - - lock_buffer(nbh); - memcpy(nbh->b_data, obh->b_data, - journal->j_blocksize); - if (flags & JBD2_FLAG_ESCAPE) { - *((__be32 *)bh->b_data) = - cpu_to_be32(JBD2_MAGIC_NUMBER); - } - - BUFFER_TRACE(nbh, "marking dirty"); - set_buffer_uptodate(nbh); - mark_buffer_dirty(nbh); - BUFFER_TRACE(nbh, "marking uptodate"); - ++info->nr_replays; - /* ll_rw_block(WRITE, 1, &nbh); */ - unlock_buffer(nbh); - brelse(obh); - brelse(nbh); - } - - skip_write: - tagp += tag_bytes; - if (!(flags & JBD2_FLAG_SAME_UUID)) - tagp += 16; - - if (flags & JBD2_FLAG_LAST_TAG) - break; - } - - brelse(bh); - continue; - - case JBD2_COMMIT_BLOCK: - /* Found an expected commit block: not much to - * do other than move on to the next sequence - * number. */ - brelse(bh); - next_commit_ID++; - continue; - - case JBD2_REVOKE_BLOCK: - /* If we aren't in the REVOKE pass, then we can - * just skip over this block. */ - if (pass != PASS_REVOKE) { - brelse(bh); - continue; - } - - err = scan_revoke_records(journal, bh, - next_commit_ID, info); - brelse(bh); - if (err) - goto failed; - continue; - - default: - jbd_debug(3, "Unrecognised magic %d, end of scan.\n", - blocktype); - brelse(bh); - goto done; - } - } - - done: - /* - * We broke out of the log scan loop: either we came to the - * known end of the log or we found an unexpected block in the - * log. If the latter happened, then we know that the "current" - * transaction marks the end of the valid log. - */ - - if (pass == PASS_SCAN) - info->end_transaction = next_commit_ID; - else { - /* It's really bad news if different passes end up at - * different places (but possible due to IO errors). */ - if (info->end_transaction != next_commit_ID) { - printk (KERN_ERR "JBD: recovery pass %d ended at " - "transaction %u, expected %u\n", - pass, next_commit_ID, info->end_transaction); - if (!success) - success = -EIO; - } - } - - return success; - - failed: - return err; -} - - -/* Scan a revoke record, marking all blocks mentioned as revoked. */ - -static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, - tid_t sequence, struct recovery_info *info) -{ - jbd2_journal_revoke_header_t *header; - int offset, max; - int record_len = 4; - - header = (jbd2_journal_revoke_header_t *) bh->b_data; - offset = sizeof(jbd2_journal_revoke_header_t); - max = be32_to_cpu(header->r_count); - - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - record_len = 8; - - while (offset + record_len <= max) { - unsigned long long blocknr; - int err; - - if (record_len == 4) - blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); - else - blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); - offset += record_len; - err = jbd2_journal_set_revoke(journal, blocknr, sequence); - if (err) - return err; - ++info->nr_revokes; - } - return 0; -} diff --git a/trunk/fs/jbd2/revoke.c b/trunk/fs/jbd2/revoke.c deleted file mode 100644 index 380d19917f37..000000000000 --- a/trunk/fs/jbd2/revoke.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * linux/fs/revoke.c - * - * Written by Stephen C. Tweedie , 2000 - * - * Copyright 2000 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Journal revoke routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. - * - * Revoke is the mechanism used to prevent old log records for deleted - * metadata from being replayed on top of newer data using the same - * blocks. The revoke mechanism is used in two separate places: - * - * + Commit: during commit we write the entire list of the current - * transaction's revoked blocks to the journal - * - * + Recovery: during recovery we record the transaction ID of all - * revoked blocks. If there are multiple revoke records in the log - * for a single block, only the last one counts, and if there is a log - * entry for a block beyond the last revoke, then that log entry still - * gets replayed. - * - * We can get interactions between revokes and new log data within a - * single transaction: - * - * Block is revoked and then journaled: - * The desired end result is the journaling of the new block, so we - * cancel the revoke before the transaction commits. - * - * Block is journaled and then revoked: - * The revoke must take precedence over the write of the block, so we - * need either to cancel the journal entry or to write the revoke - * later in the log than the log block. In this case, we choose the - * latter: journaling a block cancels any revoke record for that block - * in the current transaction, so any revoke for that block in the - * transaction must have happened after the block was journaled and so - * the revoke must take precedence. - * - * Block is revoked and then written as data: - * The data write is allowed to succeed, but the revoke is _not_ - * cancelled. We still need to prevent old log records from - * overwriting the new data. We don't even need to clear the revoke - * bit here. - * - * Revoke information on buffers is a tri-state value: - * - * RevokeValid clear: no cached revoke status, need to look it up - * RevokeValid set, Revoked clear: - * buffer has not been revoked, and cancel_revoke - * need do nothing. - * RevokeValid set, Revoked set: - * buffer has been revoked. - */ - -#ifndef __KERNEL__ -#include "jfs_user.h" -#else -#include -#include -#include -#include -#include -#include -#include -#include -#endif - -static kmem_cache_t *jbd2_revoke_record_cache; -static kmem_cache_t *jbd2_revoke_table_cache; - -/* Each revoke record represents one single revoked block. During - journal replay, this involves recording the transaction ID of the - last transaction to revoke this block. */ - -struct jbd2_revoke_record_s -{ - struct list_head hash; - tid_t sequence; /* Used for recovery only */ - unsigned long long blocknr; -}; - - -/* The revoke table is just a simple hash table of revoke records. */ -struct jbd2_revoke_table_s -{ - /* It is conceivable that we might want a larger hash table - * for recovery. Must be a power of two. */ - int hash_size; - int hash_shift; - struct list_head *hash_table; -}; - - -#ifdef __KERNEL__ -static void write_one_revoke_record(journal_t *, transaction_t *, - struct journal_head **, int *, - struct jbd2_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); -#endif - -/* Utility functions to maintain the revoke table */ - -/* Borrowed from buffer.c: this is a tried and tested block hash function */ -static inline int hash(journal_t *journal, unsigned long long block) -{ - struct jbd2_revoke_table_s *table = journal->j_revoke; - int hash_shift = table->hash_shift; - int hash = (int)block ^ (int)((block >> 31) >> 1); - - return ((hash << (hash_shift - 6)) ^ - (hash >> 13) ^ - (hash << (hash_shift - 12))) & (table->hash_size - 1); -} - -static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, - tid_t seq) -{ - struct list_head *hash_list; - struct jbd2_revoke_record_s *record; - -repeat: - record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS); - if (!record) - goto oom; - - record->sequence = seq; - record->blocknr = blocknr; - hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; - spin_lock(&journal->j_revoke_lock); - list_add(&record->hash, hash_list); - spin_unlock(&journal->j_revoke_lock); - return 0; - -oom: - if (!journal_oom_retry) - return -ENOMEM; - jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); - yield(); - goto repeat; -} - -/* Find a revoke record in the journal's hash table. */ - -static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, - unsigned long long blocknr) -{ - struct list_head *hash_list; - struct jbd2_revoke_record_s *record; - - hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; - - spin_lock(&journal->j_revoke_lock); - record = (struct jbd2_revoke_record_s *) hash_list->next; - while (&(record->hash) != hash_list) { - if (record->blocknr == blocknr) { - spin_unlock(&journal->j_revoke_lock); - return record; - } - record = (struct jbd2_revoke_record_s *) record->hash.next; - } - spin_unlock(&journal->j_revoke_lock); - return NULL; -} - -int __init jbd2_journal_init_revoke_caches(void) -{ - jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", - sizeof(struct jbd2_revoke_record_s), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (jbd2_revoke_record_cache == 0) - return -ENOMEM; - - jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", - sizeof(struct jbd2_revoke_table_s), - 0, 0, NULL, NULL); - if (jbd2_revoke_table_cache == 0) { - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - return -ENOMEM; - } - return 0; -} - -void jbd2_journal_destroy_revoke_caches(void) -{ - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - kmem_cache_destroy(jbd2_revoke_table_cache); - jbd2_revoke_table_cache = NULL; -} - -/* Initialise the revoke table for a given journal to a given size. */ - -int jbd2_journal_init_revoke(journal_t *journal, int hash_size) -{ - int shift, tmp; - - J_ASSERT (journal->j_revoke_table[0] == NULL); - - shift = 0; - tmp = hash_size; - while((tmp >>= 1UL) != 0UL) - shift++; - - journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; - - /* Check that the hash_size is a power of two */ - J_ASSERT ((hash_size & (hash_size-1)) == 0); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; - } - - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); - - journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; - } - - journal->j_revoke = journal->j_revoke_table[1]; - - /* Check that the hash_size is a power of two */ - J_ASSERT ((hash_size & (hash_size-1)) == 0); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } - - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); - - spin_lock_init(&journal->j_revoke_lock); - - return 0; -} - -/* Destoy a journal's revoke table. The table must already be empty! */ - -void jbd2_journal_destroy_revoke(journal_t *journal) -{ - struct jbd2_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; ihash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; ihash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); - journal->j_revoke = NULL; -} - - -#ifdef __KERNEL__ - -/* - * jbd2_journal_revoke: revoke a given buffer_head from the journal. This - * prevents the block from being replayed during recovery if we take a - * crash after this current transaction commits. Any subsequent - * metadata writes of the buffer in this transaction cancel the - * revoke. - * - * Note that this call may block --- it is up to the caller to make - * sure that there are no further calls to journal_write_metadata - * before the revoke is complete. In ext3, this implies calling the - * revoke before clearing the block bitmap when we are deleting - * metadata. - * - * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a - * parameter, but does _not_ forget the buffer_head if the bh was only - * found implicitly. - * - * bh_in may not be a journalled buffer - it may have come off - * the hash tables without an attached journal_head. - * - * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count - * by one. - */ - -int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, - struct buffer_head *bh_in) -{ - struct buffer_head *bh = NULL; - journal_t *journal; - struct block_device *bdev; - int err; - - might_sleep(); - if (bh_in) - BUFFER_TRACE(bh_in, "enter"); - - journal = handle->h_transaction->t_journal; - if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){ - J_ASSERT (!"Cannot set revoke feature!"); - return -EINVAL; - } - - bdev = journal->j_fs_dev; - bh = bh_in; - - if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); - if (bh) - BUFFER_TRACE(bh, "found on hash"); - } -#ifdef JBD_EXPENSIVE_CHECKING - else { - struct buffer_head *bh2; - - /* If there is a different buffer_head lying around in - * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); - if (bh2) { - /* ... and it has RevokeValid status... */ - if (bh2 != bh && buffer_revokevalid(bh2)) - /* ...then it better be revoked too, - * since it's illegal to create a revoke - * record against a buffer_head which is - * not marked revoked --- that would - * risk missing a subsequent revoke - * cancel. */ - J_ASSERT_BH(bh2, buffer_revoked(bh2)); - put_bh(bh2); - } - } -#endif - - /* We really ought not ever to revoke twice in a row without - first having the revoke cancelled: it's illegal to free a - block twice without allocating it in between! */ - if (bh) { - if (!J_EXPECT_BH(bh, !buffer_revoked(bh), - "inconsistent data on disk")) { - if (!bh_in) - brelse(bh); - return -EIO; - } - set_buffer_revoked(bh); - set_buffer_revokevalid(bh); - if (bh_in) { - BUFFER_TRACE(bh_in, "call jbd2_journal_forget"); - jbd2_journal_forget(handle, bh_in); - } else { - BUFFER_TRACE(bh, "call brelse"); - __brelse(bh); - } - } - - jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in); - err = insert_revoke_hash(journal, blocknr, - handle->h_transaction->t_tid); - BUFFER_TRACE(bh_in, "exit"); - return err; -} - -/* - * Cancel an outstanding revoke. For use only internally by the - * journaling code (called from jbd2_journal_get_write_access). - * - * We trust buffer_revoked() on the buffer if the buffer is already - * being journaled: if there is no revoke pending on the buffer, then we - * don't do anything here. - * - * This would break if it were possible for a buffer to be revoked and - * discarded, and then reallocated within the same transaction. In such - * a case we would have lost the revoked bit, but when we arrived here - * the second time we would still have a pending revoke to cancel. So, - * do not trust the Revoked bit on buffers unless RevokeValid is also - * set. - * - * The caller must have the journal locked. - */ -int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) -{ - struct jbd2_revoke_record_s *record; - journal_t *journal = handle->h_transaction->t_journal; - int need_cancel; - int did_revoke = 0; /* akpm: debug */ - struct buffer_head *bh = jh2bh(jh); - - jbd_debug(4, "journal_head %p, cancelling revoke\n", jh); - - /* Is the existing Revoke bit valid? If so, we trust it, and - * only perform the full cancel if the revoke bit is set. If - * not, we can't trust the revoke bit, and we need to do the - * full search for a revoke record. */ - if (test_set_buffer_revokevalid(bh)) { - need_cancel = test_clear_buffer_revoked(bh); - } else { - need_cancel = 1; - clear_buffer_revoked(bh); - } - - if (need_cancel) { - record = find_revoke_record(journal, bh->b_blocknr); - if (record) { - jbd_debug(4, "cancelled existing revoke on " - "blocknr %llu\n", (unsigned long long)bh->b_blocknr); - spin_lock(&journal->j_revoke_lock); - list_del(&record->hash); - spin_unlock(&journal->j_revoke_lock); - kmem_cache_free(jbd2_revoke_record_cache, record); - did_revoke = 1; - } - } - -#ifdef JBD_EXPENSIVE_CHECKING - /* There better not be one left behind by now! */ - record = find_revoke_record(journal, bh->b_blocknr); - J_ASSERT_JH(jh, record == NULL); -#endif - - /* Finally, have we just cleared revoke on an unhashed - * buffer_head? If so, we'd better make sure we clear the - * revoked status on any hashed alias too, otherwise the revoke - * state machine will get very upset later on. */ - if (need_cancel) { - struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); - if (bh2) { - if (bh2 != bh) - clear_buffer_revoked(bh2); - __brelse(bh2); - } - } - return did_revoke; -} - -/* journal_switch_revoke table select j_revoke for next transaction - * we do not want to suspend any processing until all revokes are - * written -bzzz - */ -void jbd2_journal_switch_revoke_table(journal_t *journal) -{ - int i; - - if (journal->j_revoke == journal->j_revoke_table[0]) - journal->j_revoke = journal->j_revoke_table[1]; - else - journal->j_revoke = journal->j_revoke_table[0]; - - for (i = 0; i < journal->j_revoke->hash_size; i++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); -} - -/* - * Write revoke records to the journal for all entries in the current - * revoke hash, deleting the entries as we go. - * - * Called with the journal lock held. - */ - -void jbd2_journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) -{ - struct journal_head *descriptor; - struct jbd2_revoke_record_s *record; - struct jbd2_revoke_table_s *revoke; - struct list_head *hash_list; - int i, offset, count; - - descriptor = NULL; - offset = 0; - count = 0; - - /* select revoke table for committing transaction */ - revoke = journal->j_revoke == journal->j_revoke_table[0] ? - journal->j_revoke_table[1] : journal->j_revoke_table[0]; - - for (i = 0; i < revoke->hash_size; i++) { - hash_list = &revoke->hash_table[i]; - - while (!list_empty(hash_list)) { - record = (struct jbd2_revoke_record_s *) - hash_list->next; - write_one_revoke_record(journal, transaction, - &descriptor, &offset, - record); - count++; - list_del(&record->hash); - kmem_cache_free(jbd2_revoke_record_cache, record); - } - } - if (descriptor) - flush_descriptor(journal, descriptor, offset); - jbd_debug(1, "Wrote %d revoke records\n", count); -} - -/* - * Write out one revoke record. We need to create a new descriptor - * block if the old one is full or if we have not already created one. - */ - -static void write_one_revoke_record(journal_t *journal, - transaction_t *transaction, - struct journal_head **descriptorp, - int *offsetp, - struct jbd2_revoke_record_s *record) -{ - struct journal_head *descriptor; - int offset; - journal_header_t *header; - - /* If we are already aborting, this all becomes a noop. We - still need to go round the loop in - jbd2_journal_write_revoke_records in order to free all of the - revoke records: only the IO to the journal is omitted. */ - if (is_journal_aborted(journal)) - return; - - descriptor = *descriptorp; - offset = *offsetp; - - /* Make sure we have a descriptor with space left for the record */ - if (descriptor) { - if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); - descriptor = NULL; - } - } - - if (!descriptor) { - descriptor = jbd2_journal_get_descriptor_buffer(journal); - if (!descriptor) - return; - header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; - header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); - header->h_sequence = cpu_to_be32(transaction->t_tid); - - /* Record it so that we can wait for IO completion later */ - JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); - jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); - - offset = sizeof(jbd2_journal_revoke_header_t); - *descriptorp = descriptor; - } - - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { - * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = - cpu_to_be64(record->blocknr); - offset += 8; - - } else { - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = - cpu_to_be32(record->blocknr); - offset += 4; - } - - *offsetp = offset; -} - -/* - * Flush a revoke descriptor out to the journal. If we are aborting, - * this is a noop; otherwise we are generating a buffer which needs to - * be waited for during commit, so it has to go onto the appropriate - * journal buffer list. - */ - -static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, - int offset) -{ - jbd2_journal_revoke_header_t *header; - struct buffer_head *bh = jh2bh(descriptor); - - if (is_journal_aborted(journal)) { - put_bh(bh); - return; - } - - header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; - header->r_count = cpu_to_be32(offset); - set_buffer_jwrite(bh); - BUFFER_TRACE(bh, "write"); - set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); -} -#endif - -/* - * Revoke support for recovery. - * - * Recovery needs to be able to: - * - * record all revoke records, including the tid of the latest instance - * of each revoke in the journal - * - * check whether a given block in a given transaction should be replayed - * (ie. has not been revoked by a revoke record in that or a subsequent - * transaction) - * - * empty the revoke table after recovery. - */ - -/* - * First, setting revoke records. We create a new revoke record for - * every block ever revoked in the log as we scan it for recovery, and - * we update the existing records if we find multiple revokes for a - * single block. - */ - -int jbd2_journal_set_revoke(journal_t *journal, - unsigned long long blocknr, - tid_t sequence) -{ - struct jbd2_revoke_record_s *record; - - record = find_revoke_record(journal, blocknr); - if (record) { - /* If we have multiple occurrences, only record the - * latest sequence number in the hashed record */ - if (tid_gt(sequence, record->sequence)) - record->sequence = sequence; - return 0; - } - return insert_revoke_hash(journal, blocknr, sequence); -} - -/* - * Test revoke records. For a given block referenced in the log, has - * that block been revoked? A revoke record with a given transaction - * sequence number revokes all blocks in that transaction and earlier - * ones, but later transactions still need replayed. - */ - -int jbd2_journal_test_revoke(journal_t *journal, - unsigned long long blocknr, - tid_t sequence) -{ - struct jbd2_revoke_record_s *record; - - record = find_revoke_record(journal, blocknr); - if (!record) - return 0; - if (tid_gt(sequence, record->sequence)) - return 0; - return 1; -} - -/* - * Finally, once recovery is over, we need to clear the revoke table so - * that it can be reused by the running filesystem. - */ - -void jbd2_journal_clear_revoke(journal_t *journal) -{ - int i; - struct list_head *hash_list; - struct jbd2_revoke_record_s *record; - struct jbd2_revoke_table_s *revoke; - - revoke = journal->j_revoke; - - for (i = 0; i < revoke->hash_size; i++) { - hash_list = &revoke->hash_table[i]; - while (!list_empty(hash_list)) { - record = (struct jbd2_revoke_record_s*) hash_list->next; - list_del(&record->hash); - kmem_cache_free(jbd2_revoke_record_cache, record); - } - } -} diff --git a/trunk/fs/jbd2/transaction.c b/trunk/fs/jbd2/transaction.c deleted file mode 100644 index 149957bef907..000000000000 --- a/trunk/fs/jbd2/transaction.c +++ /dev/null @@ -1,2080 +0,0 @@ -/* - * linux/fs/transaction.c - * - * Written by Stephen C. Tweedie , 1998 - * - * Copyright 1998 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Generic filesystem transaction handling code; part of the ext2fs - * journaling system. - * - * This file manages transactions (compound commits managed by the - * journaling code) and handles (individual atomic operations by the - * filesystem). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * jbd2_get_transaction: obtain a new transaction_t object. - * - * Simply allocate and initialise a new transaction. Create it in - * RUNNING state and add it to the current journal (which should not - * have an existing running transaction: we only make a new transaction - * once we have started to commit the old one). - * - * Preconditions: - * The journal MUST be locked. We don't perform atomic mallocs on the - * new transaction and we can't block without protecting against other - * processes trying to touch the journal while it is in transition. - * - * Called under j_state_lock - */ - -static transaction_t * -jbd2_get_transaction(journal_t *journal, transaction_t *transaction) -{ - transaction->t_journal = journal; - transaction->t_state = T_RUNNING; - transaction->t_tid = journal->j_transaction_sequence++; - transaction->t_expires = jiffies + journal->j_commit_interval; - spin_lock_init(&transaction->t_handle_lock); - - /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer.expires = transaction->t_expires; - add_timer(&journal->j_commit_timer); - - J_ASSERT(journal->j_running_transaction == NULL); - journal->j_running_transaction = transaction; - - return transaction; -} - -/* - * Handle management. - * - * A handle_t is an object which represents a single atomic update to a - * filesystem, and which tracks all of the modifications which form part - * of that one update. - */ - -/* - * start_this_handle: Given a handle, deal with any locking or stalling - * needed to make sure that there is enough journal space for the handle - * to begin. Attach the handle to a transaction and set up the - * transaction's buffer credits. - */ - -static int start_this_handle(journal_t *journal, handle_t *handle) -{ - transaction_t *transaction; - int needed; - int nblocks = handle->h_buffer_credits; - transaction_t *new_transaction = NULL; - int ret = 0; - - if (nblocks > journal->j_max_transaction_buffers) { - printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", - current->comm, nblocks, - journal->j_max_transaction_buffers); - ret = -ENOSPC; - goto out; - } - -alloc_transaction: - if (!journal->j_running_transaction) { - new_transaction = jbd_kmalloc(sizeof(*new_transaction), - GFP_NOFS); - if (!new_transaction) { - ret = -ENOMEM; - goto out; - } - memset(new_transaction, 0, sizeof(*new_transaction)); - } - - jbd_debug(3, "New handle %p going live.\n", handle); - -repeat: - - /* - * We need to hold j_state_lock until t_updates has been incremented, - * for proper journal barrier handling - */ - spin_lock(&journal->j_state_lock); -repeat_locked: - if (is_journal_aborted(journal) || - (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { - spin_unlock(&journal->j_state_lock); - ret = -EROFS; - goto out; - } - - /* Wait on the journal's transaction barrier if necessary */ - if (journal->j_barrier_count) { - spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_transaction_locked, - journal->j_barrier_count == 0); - goto repeat; - } - - if (!journal->j_running_transaction) { - if (!new_transaction) { - spin_unlock(&journal->j_state_lock); - goto alloc_transaction; - } - jbd2_get_transaction(journal, new_transaction); - new_transaction = NULL; - } - - transaction = journal->j_running_transaction; - - /* - * If the current transaction is locked down for commit, wait for the - * lock to be released. - */ - if (transaction->t_state == T_LOCKED) { - DEFINE_WAIT(wait); - - prepare_to_wait(&journal->j_wait_transaction_locked, - &wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * If there is not enough space left in the log to write all potential - * buffers requested by this operation, we need to stall pending a log - * checkpoint to free some more log space. - */ - spin_lock(&transaction->t_handle_lock); - needed = transaction->t_outstanding_credits + nblocks; - - if (needed > journal->j_max_transaction_buffers) { - /* - * If the current transaction is already too large, then start - * to commit it: we can then go back and attach this handle to - * a new transaction. - */ - DEFINE_WAIT(wait); - - jbd_debug(2, "Handle %p starting new commit...\n", handle); - spin_unlock(&transaction->t_handle_lock); - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, - TASK_UNINTERRUPTIBLE); - __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * The commit code assumes that it can get enough log space - * without forcing a checkpoint. This is *critical* for - * correctness: a checkpoint of a buffer which is also - * associated with a committing transaction creates a deadlock, - * so commit simply cannot force through checkpoints. - * - * We must therefore ensure the necessary space in the journal - * *before* starting to dirty potentially checkpointed buffers - * in the new transaction. - * - * The worst part is, any transaction currently committing can - * reduce the free space arbitrarily. Be careful to account for - * those buffers when checkpointing. - */ - - /* - * @@@ AKPM: This seems rather over-defensive. We're giving commit - * a _lot_ of headroom: 1/4 of the journal plus the size of - * the committing transaction. Really, we only need to give it - * committing_transaction->t_outstanding_credits plus "enough" for - * the log control blocks. - * Also, this test is inconsitent with the matching one in - * jbd2_journal_extend(). - */ - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); - spin_unlock(&transaction->t_handle_lock); - __jbd2_log_wait_for_space(journal); - goto repeat_locked; - } - - /* OK, account for the buffers that this operation expects to - * use and add the handle to the running transaction. */ - - handle->h_transaction = transaction; - transaction->t_outstanding_credits += nblocks; - transaction->t_updates++; - transaction->t_handle_count++; - jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", - handle, nblocks, transaction->t_outstanding_credits, - __jbd2_log_space_left(journal)); - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); -out: - if (unlikely(new_transaction)) /* It's usually NULL */ - kfree(new_transaction); - return ret; -} - -/* Allocate a new handle. This should probably be in a slab... */ -static handle_t *new_handle(int nblocks) -{ - handle_t *handle = jbd_alloc_handle(GFP_NOFS); - if (!handle) - return NULL; - memset(handle, 0, sizeof(*handle)); - handle->h_buffer_credits = nblocks; - handle->h_ref = 1; - - return handle; -} - -/** - * handle_t *jbd2_journal_start() - Obtain a new handle. - * @journal: Journal to start transaction on. - * @nblocks: number of block buffer we might modify - * - * We make sure that the transaction can guarantee at least nblocks of - * modified buffers in the log. We block until the log can guarantee - * that much space. - * - * This function is visible to journal users (like ext3fs), so is not - * called with the journal already locked. - * - * Return a pointer to a newly allocated handle, or NULL on failure - */ -handle_t *jbd2_journal_start(journal_t *journal, int nblocks) -{ - handle_t *handle = journal_current_handle(); - int err; - - if (!journal) - return ERR_PTR(-EROFS); - - if (handle) { - J_ASSERT(handle->h_transaction->t_journal == journal); - handle->h_ref++; - return handle; - } - - handle = new_handle(nblocks); - if (!handle) - return ERR_PTR(-ENOMEM); - - current->journal_info = handle; - - err = start_this_handle(journal, handle); - if (err < 0) { - jbd_free_handle(handle); - current->journal_info = NULL; - handle = ERR_PTR(err); - } - return handle; -} - -/** - * int jbd2_journal_extend() - extend buffer credits. - * @handle: handle to 'extend' - * @nblocks: nr blocks to try to extend by. - * - * Some transactions, such as large extends and truncates, can be done - * atomically all at once or in several stages. The operation requests - * a credit for a number of buffer modications in advance, but can - * extend its credit if it needs more. - * - * jbd2_journal_extend tries to give the running handle more buffer credits. - * It does not guarantee that allocation - this is a best-effort only. - * The calling process MUST be able to deal cleanly with a failure to - * extend here. - * - * Return 0 on success, non-zero on failure. - * - * return code < 0 implies an error - * return code > 0 implies normal transaction-full status. - */ -int jbd2_journal_extend(handle_t *handle, int nblocks) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int result; - int wanted; - - result = -EIO; - if (is_handle_aborted(handle)) - goto out; - - result = 1; - - spin_lock(&journal->j_state_lock); - - /* Don't extend a locked-down transaction! */ - if (handle->h_transaction->t_state != T_RUNNING) { - jbd_debug(3, "denied handle %p %d blocks: " - "transaction not running\n", handle, nblocks); - goto error_out; - } - - spin_lock(&transaction->t_handle_lock); - wanted = transaction->t_outstanding_credits + nblocks; - - if (wanted > journal->j_max_transaction_buffers) { - jbd_debug(3, "denied handle %p %d blocks: " - "transaction too large\n", handle, nblocks); - goto unlock; - } - - if (wanted > __jbd2_log_space_left(journal)) { - jbd_debug(3, "denied handle %p %d blocks: " - "insufficient log space\n", handle, nblocks); - goto unlock; - } - - handle->h_buffer_credits += nblocks; - transaction->t_outstanding_credits += nblocks; - result = 0; - - jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); -unlock: - spin_unlock(&transaction->t_handle_lock); -error_out: - spin_unlock(&journal->j_state_lock); -out: - return result; -} - - -/** - * int jbd2_journal_restart() - restart a handle . - * @handle: handle to restart - * @nblocks: nr credits requested - * - * Restart a handle for a multi-transaction filesystem - * operation. - * - * If the jbd2_journal_extend() call above fails to grant new buffer credits - * to a running handle, a call to jbd2_journal_restart will commit the - * handle's transaction so far and reattach the handle to a new - * transaction capabable of guaranteeing the requested number of - * credits. - */ - -int jbd2_journal_restart(handle_t *handle, int nblocks) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int ret; - - /* If we've had an abort of any type, don't even think about - * actually doing the restart! */ - if (is_handle_aborted(handle)) - return 0; - - /* - * First unlink the handle from its current transaction, and start the - * commit on that. - */ - J_ASSERT(transaction->t_updates > 0); - J_ASSERT(journal_current_handle() == handle); - - spin_lock(&journal->j_state_lock); - spin_lock(&transaction->t_handle_lock); - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; - - if (!transaction->t_updates) - wake_up(&journal->j_wait_updates); - spin_unlock(&transaction->t_handle_lock); - - jbd_debug(2, "restarting handle %p\n", handle); - __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); - - handle->h_buffer_credits = nblocks; - ret = start_this_handle(journal, handle); - return ret; -} - - -/** - * void jbd2_journal_lock_updates () - establish a transaction barrier. - * @journal: Journal to establish a barrier on. - * - * This locks out any further updates from being started, and blocks - * until all existing updates have completed, returning only once the - * journal is in a quiescent state with no updates running. - * - * The journal lock should not be held on entry. - */ -void jbd2_journal_lock_updates(journal_t *journal) -{ - DEFINE_WAIT(wait); - - spin_lock(&journal->j_state_lock); - ++journal->j_barrier_count; - - /* Wait until there are no running updates */ - while (1) { - transaction_t *transaction = journal->j_running_transaction; - - if (!transaction) - break; - - spin_lock(&transaction->t_handle_lock); - if (!transaction->t_updates) { - spin_unlock(&transaction->t_handle_lock); - break; - } - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_updates, &wait); - spin_lock(&journal->j_state_lock); - } - spin_unlock(&journal->j_state_lock); - - /* - * We have now established a barrier against other normal updates, but - * we also need to barrier against other jbd2_journal_lock_updates() calls - * to make sure that we serialise special journal-locked operations - * too. - */ - mutex_lock(&journal->j_barrier); -} - -/** - * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier - * @journal: Journal to release the barrier on. - * - * Release a transaction barrier obtained with jbd2_journal_lock_updates(). - * - * Should be called without the journal lock held. - */ -void jbd2_journal_unlock_updates (journal_t *journal) -{ - J_ASSERT(journal->j_barrier_count != 0); - - mutex_unlock(&journal->j_barrier); - spin_lock(&journal->j_state_lock); - --journal->j_barrier_count; - spin_unlock(&journal->j_state_lock); - wake_up(&journal->j_wait_transaction_locked); -} - -/* - * Report any unexpected dirty buffers which turn up. Normally those - * indicate an error, but they can occur if the user is running (say) - * tune2fs to modify the live filesystem, so we need the option of - * continuing as gracefully as possible. # - * - * The caller should already hold the journal lock and - * j_list_lock spinlock: most callers will need those anyway - * in order to probe the buffer's journaling state safely. - */ -static void jbd_unexpected_dirty_buffer(struct journal_head *jh) -{ - int jlist; - - /* If this buffer is one which might reasonably be dirty - * --- ie. data, or not part of this journal --- then - * we're OK to leave it alone, but otherwise we need to - * move the dirty bit to the journal's own internal - * JBDDirty bit. */ - jlist = jh->b_jlist; - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - struct buffer_head *bh = jh2bh(jh); - - if (test_clear_buffer_dirty(bh)) - set_buffer_jbddirty(bh); - } -} - -/* - * If the buffer is already part of the current transaction, then there - * is nothing we need to do. If it is already part of a prior - * transaction which we are still committing to disk, then we need to - * make sure that we do not overwrite the old copy: we do copy-out to - * preserve the copy going to disk. We also account the buffer against - * the handle's metadata buffer credits (unless the buffer is already - * part of the transaction, that is). - * - */ -static int -do_get_write_access(handle_t *handle, struct journal_head *jh, - int force_copy) -{ - struct buffer_head *bh; - transaction_t *transaction; - journal_t *journal; - int error; - char *frozen_buffer = NULL; - int need_copy = 0; - - if (is_handle_aborted(handle)) - return -EROFS; - - transaction = handle->h_transaction; - journal = transaction->t_journal; - - jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); - - JBUFFER_TRACE(jh, "entry"); -repeat: - bh = jh2bh(jh); - - /* @@@ Need to check for errors here at some point. */ - - lock_buffer(bh); - jbd_lock_bh_state(bh); - - /* We now hold the buffer lock so it is safe to query the buffer - * state. Is the buffer dirty? - * - * If so, there are two possibilities. The buffer may be - * non-journaled, and undergoing a quite legitimate writeback. - * Otherwise, it is journaled, and we don't expect dirty buffers - * in that state (the buffers should be marked JBD_Dirty - * instead.) So either the IO is being done under our own - * control and this is a bug, or it's a third party IO such as - * dump(8) (which may leave the buffer scheduled for read --- - * ie. locked but not dirty) or tune2fs (which may actually have - * the buffer dirtied, ugh.) */ - - if (buffer_dirty(bh)) { - /* - * First question: is this buffer already part of the current - * transaction or the existing committing transaction? - */ - if (jh->b_transaction) { - J_ASSERT_JH(jh, - jh->b_transaction == transaction || - jh->b_transaction == - journal->j_committing_transaction); - if (jh->b_next_transaction) - J_ASSERT_JH(jh, jh->b_next_transaction == - transaction); - } - /* - * In any case we need to clean the dirty flag and we must - * do it under the buffer lock to be sure we don't race - * with running write-out. - */ - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); - jbd_unexpected_dirty_buffer(jh); - } - - unlock_buffer(bh); - - error = -EROFS; - if (is_handle_aborted(handle)) { - jbd_unlock_bh_state(bh); - goto out; - } - error = 0; - - /* - * The buffer is already part of this transaction if b_transaction or - * b_next_transaction points to it - */ - if (jh->b_transaction == transaction || - jh->b_next_transaction == transaction) - goto done; - - /* - * If there is already a copy-out version of this buffer, then we don't - * need to make another one - */ - if (jh->b_frozen_data) { - JBUFFER_TRACE(jh, "has frozen data"); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - jh->b_next_transaction = transaction; - goto done; - } - - /* Is there data here we need to preserve? */ - - if (jh->b_transaction && jh->b_transaction != transaction) { - JBUFFER_TRACE(jh, "owned by older transaction"); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_committing_transaction); - - /* There is one case we have to be very careful about. - * If the committing transaction is currently writing - * this buffer out to disk and has NOT made a copy-out, - * then we cannot modify the buffer contents at all - * right now. The essence of copy-out is that it is the - * extra copy, not the primary copy, which gets - * journaled. If the primary copy is already going to - * disk then we cannot do copy-out here. */ - - if (jh->b_jlist == BJ_Shadow) { - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); - wait_queue_head_t *wqh; - - wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); - - JBUFFER_TRACE(jh, "on shadow: sleep"); - jbd_unlock_bh_state(bh); - /* commit wakes up all shadow buffers after IO */ - for ( ; ; ) { - prepare_to_wait(wqh, &wait.wait, - TASK_UNINTERRUPTIBLE); - if (jh->b_jlist != BJ_Shadow) - break; - schedule(); - } - finish_wait(wqh, &wait.wait); - goto repeat; - } - - /* Only do the copy if the currently-owning transaction - * still needs it. If it is on the Forget list, the - * committing transaction is past that stage. The - * buffer had better remain locked during the kmalloc, - * but that should be true --- we hold the journal lock - * still and the buffer is already on the BUF_JOURNAL - * list so won't be flushed. - * - * Subtle point, though: if this is a get_undo_access, - * then we will be relying on the frozen_data to contain - * the new value of the committed_data record after the - * transaction, so we HAVE to force the frozen_data copy - * in that case. */ - - if (jh->b_jlist != BJ_Forget || force_copy) { - JBUFFER_TRACE(jh, "generate frozen data"); - if (!frozen_buffer) { - JBUFFER_TRACE(jh, "allocate memory for buffer"); - jbd_unlock_bh_state(bh); - frozen_buffer = - jbd2_slab_alloc(jh2bh(jh)->b_size, - GFP_NOFS); - if (!frozen_buffer) { - printk(KERN_EMERG - "%s: OOM for frozen_buffer\n", - __FUNCTION__); - JBUFFER_TRACE(jh, "oom!"); - error = -ENOMEM; - jbd_lock_bh_state(bh); - goto done; - } - goto repeat; - } - jh->b_frozen_data = frozen_buffer; - frozen_buffer = NULL; - need_copy = 1; - } - jh->b_next_transaction = transaction; - } - - - /* - * Finally, if the buffer is not journaled right now, we need to make - * sure it doesn't get written to disk before the caller actually - * commits the new data - */ - if (!jh->b_transaction) { - JBUFFER_TRACE(jh, "no transaction"); - J_ASSERT_JH(jh, !jh->b_next_transaction); - jh->b_transaction = transaction; - JBUFFER_TRACE(jh, "file as BJ_Reserved"); - spin_lock(&journal->j_list_lock); - __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); - spin_unlock(&journal->j_list_lock); - } - -done: - if (need_copy) { - struct page *page; - int offset; - char *source; - - J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), - "Possible IO failure.\n"); - page = jh2bh(jh)->b_page; - offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; - source = kmap_atomic(page, KM_USER0); - memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); - kunmap_atomic(source, KM_USER0); - } - jbd_unlock_bh_state(bh); - - /* - * If we are about to journal a buffer, then any revoke pending on it is - * no longer valid - */ - jbd2_journal_cancel_revoke(handle, jh); - -out: - if (unlikely(frozen_buffer)) /* It's usually NULL */ - jbd2_slab_free(frozen_buffer, bh->b_size); - - JBUFFER_TRACE(jh, "exit"); - return error; -} - -/** - * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. - * @handle: transaction to add buffer modifications to - * @bh: bh to be used for metadata writes - * @credits: variable that will receive credits for the buffer - * - * Returns an error code or 0 on success. - * - * In full data journalling mode the buffer may be of type BJ_AsyncData, - * because we're write()ing a buffer which is also part of a shared mapping. - */ - -int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) -{ - struct journal_head *jh = jbd2_journal_add_journal_head(bh); - int rc; - - /* We do not want to get caught playing with fields which the - * log thread also manipulates. Make sure that the buffer - * completes any outstanding IO before proceeding. */ - rc = do_get_write_access(handle, jh, 0); - jbd2_journal_put_journal_head(jh); - return rc; -} - - -/* - * When the user wants to journal a newly created buffer_head - * (ie. getblk() returned a new buffer and we are going to populate it - * manually rather than reading off disk), then we need to keep the - * buffer_head locked until it has been completely filled with new - * data. In this case, we should be able to make the assertion that - * the bh is not already part of an existing transaction. - * - * The buffer should already be locked by the caller by this point. - * There is no lock ranking violation: it was a newly created, - * unlocked buffer beforehand. */ - -/** - * int jbd2_journal_get_create_access () - notify intent to use newly created bh - * @handle: transaction to new buffer to - * @bh: new buffer. - * - * Call this if you create a new bh. - */ -int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = jbd2_journal_add_journal_head(bh); - int err; - - jbd_debug(5, "journal_head %p\n", jh); - err = -EROFS; - if (is_handle_aborted(handle)) - goto out; - err = 0; - - JBUFFER_TRACE(jh, "entry"); - /* - * The buffer may already belong to this transaction due to pre-zeroing - * in the filesystem's new_block code. It may also be on the previous, - * committing transaction's lists, but it HAS to be in Forget state in - * that case: the transaction must have deleted the buffer for it to be - * reused here. - */ - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - J_ASSERT_JH(jh, (jh->b_transaction == transaction || - jh->b_transaction == NULL || - (jh->b_transaction == journal->j_committing_transaction && - jh->b_jlist == BJ_Forget))); - - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); - - if (jh->b_transaction == NULL) { - jh->b_transaction = transaction; - JBUFFER_TRACE(jh, "file as BJ_Reserved"); - __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); - } else if (jh->b_transaction == journal->j_committing_transaction) { - JBUFFER_TRACE(jh, "set next transaction"); - jh->b_next_transaction = transaction; - } - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - - /* - * akpm: I added this. ext3_alloc_branch can pick up new indirect - * blocks which contain freed but then revoked metadata. We need - * to cancel the revoke in case we end up freeing it yet again - * and the reallocating as data - this would cause a second revoke, - * which hits an assertion error. - */ - JBUFFER_TRACE(jh, "cancelling revoke"); - jbd2_journal_cancel_revoke(handle, jh); - jbd2_journal_put_journal_head(jh); -out: - return err; -} - -/** - * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with - * non-rewindable consequences - * @handle: transaction - * @bh: buffer to undo - * @credits: store the number of taken credits here (if not NULL) - * - * Sometimes there is a need to distinguish between metadata which has - * been committed to disk and that which has not. The ext3fs code uses - * this for freeing and allocating space, we have to make sure that we - * do not reuse freed space until the deallocation has been committed, - * since if we overwrote that space we would make the delete - * un-rewindable in case of a crash. - * - * To deal with that, jbd2_journal_get_undo_access requests write access to a - * buffer for parts of non-rewindable operations such as delete - * operations on the bitmaps. The journaling code must keep a copy of - * the buffer's contents prior to the undo_access call until such time - * as we know that the buffer has definitely been committed to disk. - * - * We never need to know which transaction the committed data is part - * of, buffers touched here are guaranteed to be dirtied later and so - * will be committed to a new transaction in due course, at which point - * we can discard the old committed data pointer. - * - * Returns error number or 0 on success. - */ -int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) -{ - int err; - struct journal_head *jh = jbd2_journal_add_journal_head(bh); - char *committed_data = NULL; - - JBUFFER_TRACE(jh, "entry"); - - /* - * Do this first --- it can drop the journal lock, so we want to - * make sure that obtaining the committed_data is done - * atomically wrt. completion of any outstanding commits. - */ - err = do_get_write_access(handle, jh, 1); - if (err) - goto out; - -repeat: - if (!jh->b_committed_data) { - committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); - if (!committed_data) { - printk(KERN_EMERG "%s: No memory for committed data\n", - __FUNCTION__); - err = -ENOMEM; - goto out; - } - } - - jbd_lock_bh_state(bh); - if (!jh->b_committed_data) { - /* Copy out the current buffer contents into the - * preserved, committed copy. */ - JBUFFER_TRACE(jh, "generate b_committed data"); - if (!committed_data) { - jbd_unlock_bh_state(bh); - goto repeat; - } - - jh->b_committed_data = committed_data; - committed_data = NULL; - memcpy(jh->b_committed_data, bh->b_data, bh->b_size); - } - jbd_unlock_bh_state(bh); -out: - jbd2_journal_put_journal_head(jh); - if (unlikely(committed_data)) - jbd2_slab_free(committed_data, bh->b_size); - return err; -} - -/** - * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which - * needs to be flushed before we can commit the - * current transaction. - * @handle: transaction - * @bh: bufferhead to mark - * - * The buffer is placed on the transaction's data list and is marked as - * belonging to the transaction. - * - * Returns error number or 0 on success. - * - * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage - * by kswapd. - */ -int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) -{ - journal_t *journal = handle->h_transaction->t_journal; - int need_brelse = 0; - struct journal_head *jh; - - if (is_handle_aborted(handle)) - return 0; - - jh = jbd2_journal_add_journal_head(bh); - JBUFFER_TRACE(jh, "entry"); - - /* - * The buffer could *already* be dirty. Writeout can start - * at any time. - */ - jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); - - /* - * What if the buffer is already part of a running transaction? - * - * There are two cases: - * 1) It is part of the current running transaction. Refile it, - * just in case we have allocated it as metadata, deallocated - * it, then reallocated it as data. - * 2) It is part of the previous, still-committing transaction. - * If all we want to do is to guarantee that the buffer will be - * written to disk before this new transaction commits, then - * being sure that the *previous* transaction has this same - * property is sufficient for us! Just leave it on its old - * transaction. - * - * In case (2), the buffer must not already exist as metadata - * --- that would violate write ordering (a transaction is free - * to write its data at any point, even before the previous - * committing transaction has committed). The caller must - * never, ever allow this to happen: there's nothing we can do - * about it in this layer. - */ - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - if (jh->b_transaction) { - JBUFFER_TRACE(jh, "has transaction"); - if (jh->b_transaction != handle->h_transaction) { - JBUFFER_TRACE(jh, "belongs to older transaction"); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_committing_transaction); - - /* @@@ IS THIS TRUE ? */ - /* - * Not any more. Scenario: someone does a write() - * in data=journal mode. The buffer's transaction has - * moved into commit. Then someone does another - * write() to the file. We do the frozen data copyout - * and set b_next_transaction to point to j_running_t. - * And while we're in that state, someone does a - * writepage() in an attempt to pageout the same area - * of the file via a shared mapping. At present that - * calls jbd2_journal_dirty_data(), and we get right here. - * It may be too late to journal the data. Simply - * falling through to the next test will suffice: the - * data will be dirty and wil be checkpointed. The - * ordering comments in the next comment block still - * apply. - */ - //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - - /* - * If we're journalling data, and this buffer was - * subject to a write(), it could be metadata, forget - * or shadow against the committing transaction. Now, - * someone has dirtied the same darn page via a mapping - * and it is being writepage()'d. - * We *could* just steal the page from commit, with some - * fancy locking there. Instead, we just skip it - - * don't tie the page's buffers to the new transaction - * at all. - * Implication: if we crash before the writepage() data - * is written into the filesystem, recovery will replay - * the write() data. - */ - if (jh->b_jlist != BJ_None && - jh->b_jlist != BJ_SyncData && - jh->b_jlist != BJ_Locked) { - JBUFFER_TRACE(jh, "Not stealing"); - goto no_journal; - } - - /* - * This buffer may be undergoing writeout in commit. We - * can't return from here and let the caller dirty it - * again because that can cause the write-out loop in - * commit to never terminate. - */ - if (buffer_dirty(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - need_brelse = 1; - sync_dirty_buffer(bh); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - /* The buffer may become locked again at any - time if it is redirtied */ - } - - /* journal_clean_data_list() may have got there first */ - if (jh->b_transaction != NULL) { - JBUFFER_TRACE(jh, "unfile from commit"); - __jbd2_journal_temp_unlink_buffer(jh); - /* It still points to the committing - * transaction; move it to this one so - * that the refile assert checks are - * happy. */ - jh->b_transaction = handle->h_transaction; - } - /* The buffer will be refiled below */ - - } - /* - * Special case --- the buffer might actually have been - * allocated and then immediately deallocated in the previous, - * committing transaction, so might still be left on that - * transaction's metadata lists. - */ - if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { - JBUFFER_TRACE(jh, "not on correct data list: unfile"); - J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); - __jbd2_journal_temp_unlink_buffer(jh); - jh->b_transaction = handle->h_transaction; - JBUFFER_TRACE(jh, "file as data"); - __jbd2_journal_file_buffer(jh, handle->h_transaction, - BJ_SyncData); - } - } else { - JBUFFER_TRACE(jh, "not on a transaction"); - __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); - } -no_journal: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - if (need_brelse) { - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - } - JBUFFER_TRACE(jh, "exit"); - jbd2_journal_put_journal_head(jh); - return 0; -} - -/** - * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata - * @handle: transaction to add buffer to. - * @bh: buffer to mark - * - * mark dirty metadata which needs to be journaled as part of the current - * transaction. - * - * The buffer is placed on the transaction's metadata list and is marked - * as belonging to the transaction. - * - * Returns error number or 0 on success. - * - * Special care needs to be taken if the buffer already belongs to the - * current committing transaction (in which case we should have frozen - * data present for that commit). In that case, we don't relink the - * buffer: that only gets done when the old transaction finally - * completes its commit. - */ -int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); - - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); - if (is_handle_aborted(handle)) - goto out; - - jbd_lock_bh_state(bh); - - if (jh->b_modified == 0) { - /* - * This buffer's got modified and becoming part - * of the transaction. This needs to be done - * once a transaction -bzzz - */ - jh->b_modified = 1; - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - } - - /* - * fastpath, to avoid expensive locking. If this buffer is already - * on the running transaction's metadata list there is nothing to do. - * Nobody can take it off again because there is a handle open. - * I _think_ we're OK here with SMP barriers - a mistaken decision will - * result in this test being false, so we go in and take the locks. - */ - if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { - JBUFFER_TRACE(jh, "fastpath"); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_running_transaction); - goto out_unlock_bh; - } - - set_buffer_jbddirty(bh); - - /* - * Metadata already on the current transaction list doesn't - * need to be filed. Metadata on another transaction's list must - * be committing, and will be refiled once the commit completes: - * leave it alone for now. - */ - if (jh->b_transaction != transaction) { - JBUFFER_TRACE(jh, "already on other transaction"); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_committing_transaction); - J_ASSERT_JH(jh, jh->b_next_transaction == transaction); - /* And this case is illegal: we can't reuse another - * transaction's data buffer, ever. */ - goto out_unlock_bh; - } - - /* That test should have eliminated the following case: */ - J_ASSERT_JH(jh, jh->b_frozen_data == 0); - - JBUFFER_TRACE(jh, "file as BJ_Metadata"); - spin_lock(&journal->j_list_lock); - __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); - spin_unlock(&journal->j_list_lock); -out_unlock_bh: - jbd_unlock_bh_state(bh); -out: - JBUFFER_TRACE(jh, "exit"); - return 0; -} - -/* - * jbd2_journal_release_buffer: undo a get_write_access without any buffer - * updates, if the update decided in the end that it didn't need access. - * - */ -void -jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - BUFFER_TRACE(bh, "entry"); -} - -/** - * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. - * @handle: transaction handle - * @bh: bh to 'forget' - * - * We can only do the bforget if there are no commits pending against the - * buffer. If the buffer is dirty in the current running transaction we - * can safely unlink it. - * - * bh may not be a journalled buffer at all - it may be a non-JBD - * buffer which came off the hashtable. Check for this. - * - * Decrements bh->b_count by one. - * - * Allow this call even if the handle has aborted --- it may be part of - * the caller's cleanup after an abort. - */ -int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh; - int drop_reserve = 0; - int err = 0; - - BUFFER_TRACE(bh, "entry"); - - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - - if (!buffer_jbd(bh)) - goto not_jbd; - jh = bh2jh(bh); - - /* Critical error: attempting to delete a bitmap buffer, maybe? - * Don't do any jbd operations, and return an error. */ - if (!J_EXPECT_JH(jh, !jh->b_committed_data, - "inconsistent data on disk")) { - err = -EIO; - goto not_jbd; - } - - /* - * The buffer's going from the transaction, we must drop - * all references -bzzz - */ - jh->b_modified = 0; - - if (jh->b_transaction == handle->h_transaction) { - J_ASSERT_JH(jh, !jh->b_frozen_data); - - /* If we are forgetting a buffer which is already part - * of this transaction, then we can just drop it from - * the transaction immediately. */ - clear_buffer_dirty(bh); - clear_buffer_jbddirty(bh); - - JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); - - drop_reserve = 1; - - /* - * We are no longer going to journal this buffer. - * However, the commit of this transaction is still - * important to the buffer: the delete that we are now - * processing might obsolete an old log entry, so by - * committing, we can satisfy the buffer's checkpoint. - * - * So, if we have a checkpoint on the buffer, we should - * now refile the buffer on our BJ_Forget list so that - * we know to remove the checkpoint after we commit. - */ - - if (jh->b_cp_transaction) { - __jbd2_journal_temp_unlink_buffer(jh); - __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); - } else { - __jbd2_journal_unfile_buffer(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); - if (!buffer_jbd(bh)) { - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __bforget(bh); - goto drop; - } - } - } else if (jh->b_transaction) { - J_ASSERT_JH(jh, (jh->b_transaction == - journal->j_committing_transaction)); - /* However, if the buffer is still owned by a prior - * (committing) transaction, we can't drop it yet... */ - JBUFFER_TRACE(jh, "belongs to older transaction"); - /* ... but we CAN drop it from the new transaction if we - * have also modified it since the original commit. */ - - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == transaction); - jh->b_next_transaction = NULL; - drop_reserve = 1; - } - } - -not_jbd: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __brelse(bh); -drop: - if (drop_reserve) { - /* no need to reserve log space for this block -bzzz */ - handle->h_buffer_credits++; - } - return err; -} - -/** - * int jbd2_journal_stop() - complete a transaction - * @handle: tranaction to complete. - * - * All done for a particular handle. - * - * There is not much action needed here. We just return any remaining - * buffer credits to the transaction and remove the handle. The only - * complication is that we need to start a commit operation if the - * filesystem is marked for synchronous update. - * - * jbd2_journal_stop itself will not usually return an error, but it may - * do so in unusual circumstances. In particular, expect it to - * return -EIO if a jbd2_journal_abort has been executed since the - * transaction began. - */ -int jbd2_journal_stop(handle_t *handle) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int old_handle_count, err; - pid_t pid; - - J_ASSERT(transaction->t_updates > 0); - J_ASSERT(journal_current_handle() == handle); - - if (is_handle_aborted(handle)) - err = -EIO; - else - err = 0; - - if (--handle->h_ref > 0) { - jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, - handle->h_ref); - return err; - } - - jbd_debug(4, "Handle %p going down\n", handle); - - /* - * Implement synchronous transaction batching. If the handle - * was synchronous, don't force a commit immediately. Let's - * yield and let another thread piggyback onto this transaction. - * Keep doing that while new threads continue to arrive. - * It doesn't cost much - we're about to run a commit and sleep - * on IO anyway. Speeds up many-threaded, many-dir operations - * by 30x or more... - * - * But don't do this if this process was the most recent one to - * perform a synchronous write. We do this to detect the case where a - * single process is doing a stream of sync writes. No point in waiting - * for joiners in that case. - */ - pid = current->pid; - if (handle->h_sync && journal->j_last_sync_writer != pid) { - journal->j_last_sync_writer = pid; - do { - old_handle_count = transaction->t_handle_count; - schedule_timeout_uninterruptible(1); - } while (old_handle_count != transaction->t_handle_count); - } - - current->journal_info = NULL; - spin_lock(&journal->j_state_lock); - spin_lock(&transaction->t_handle_lock); - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; - if (!transaction->t_updates) { - wake_up(&journal->j_wait_updates); - if (journal->j_barrier_count) - wake_up(&journal->j_wait_transaction_locked); - } - - /* - * If the handle is marked SYNC, we need to set another commit - * going! We also want to force a commit if the current - * transaction is occupying too much of the log, or if the - * transaction is too old now. - */ - if (handle->h_sync || - transaction->t_outstanding_credits > - journal->j_max_transaction_buffers || - time_after_eq(jiffies, transaction->t_expires)) { - /* Do this even for aborted journals: an abort still - * completes the commit thread, it just doesn't write - * anything to disk. */ - tid_t tid = transaction->t_tid; - - spin_unlock(&transaction->t_handle_lock); - jbd_debug(2, "transaction too old, requesting commit for " - "handle %p\n", handle); - /* This is non-blocking */ - __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); - - /* - * Special case: JBD2_SYNC synchronous updates require us - * to wait for the commit to complete. - */ - if (handle->h_sync && !(current->flags & PF_MEMALLOC)) - err = jbd2_log_wait_commit(journal, tid); - } else { - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - } - - jbd_free_handle(handle); - return err; -} - -/**int jbd2_journal_force_commit() - force any uncommitted transactions - * @journal: journal to force - * - * For synchronous operations: force any uncommitted transactions - * to disk. May seem kludgy, but it reuses all the handle batching - * code in a very simple manner. - */ -int jbd2_journal_force_commit(journal_t *journal) -{ - handle_t *handle; - int ret; - - handle = jbd2_journal_start(journal, 1); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - } else { - handle->h_sync = 1; - ret = jbd2_journal_stop(handle); - } - return ret; -} - -/* - * - * List management code snippets: various functions for manipulating the - * transaction buffer lists. - * - */ - -/* - * Append a buffer to a transaction list, given the transaction's list head - * pointer. - * - * j_list_lock is held. - * - * jbd_lock_bh_state(jh2bh(jh)) is held. - */ - -static inline void -__blist_add_buffer(struct journal_head **list, struct journal_head *jh) -{ - if (!*list) { - jh->b_tnext = jh->b_tprev = jh; - *list = jh; - } else { - /* Insert at the tail of the list to preserve order */ - struct journal_head *first = *list, *last = first->b_tprev; - jh->b_tprev = last; - jh->b_tnext = first; - last->b_tnext = first->b_tprev = jh; - } -} - -/* - * Remove a buffer from a transaction list, given the transaction's list - * head pointer. - * - * Called with j_list_lock held, and the journal may not be locked. - * - * jbd_lock_bh_state(jh2bh(jh)) is held. - */ - -static inline void -__blist_del_buffer(struct journal_head **list, struct journal_head *jh) -{ - if (*list == jh) { - *list = jh->b_tnext; - if (*list == jh) - *list = NULL; - } - jh->b_tprev->b_tnext = jh->b_tnext; - jh->b_tnext->b_tprev = jh->b_tprev; -} - -/* - * Remove a buffer from the appropriate transaction list. - * - * Note that this function can *change* the value of - * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, - * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller - * is holding onto a copy of one of thee pointers, it could go bad. - * Generally the caller needs to re-read the pointer from the transaction_t. - * - * Called under j_list_lock. The journal may not be locked. - */ -void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) -{ - struct journal_head **list = NULL; - transaction_t *transaction; - struct buffer_head *bh = jh2bh(jh); - - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); - transaction = jh->b_transaction; - if (transaction) - assert_spin_locked(&transaction->t_journal->j_list_lock); - - J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); - if (jh->b_jlist != BJ_None) - J_ASSERT_JH(jh, transaction != 0); - - switch (jh->b_jlist) { - case BJ_None: - return; - case BJ_SyncData: - list = &transaction->t_sync_datalist; - break; - case BJ_Metadata: - transaction->t_nr_buffers--; - J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); - list = &transaction->t_buffers; - break; - case BJ_Forget: - list = &transaction->t_forget; - break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; - case BJ_Shadow: - list = &transaction->t_shadow_list; - break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; - case BJ_Reserved: - list = &transaction->t_reserved_list; - break; - case BJ_Locked: - list = &transaction->t_locked_list; - break; - } - - __blist_del_buffer(list, jh); - jh->b_jlist = BJ_None; - if (test_clear_buffer_jbddirty(bh)) - mark_buffer_dirty(bh); /* Expose it to the VM */ -} - -void __jbd2_journal_unfile_buffer(struct journal_head *jh) -{ - __jbd2_journal_temp_unlink_buffer(jh); - jh->b_transaction = NULL; -} - -void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) -{ - jbd_lock_bh_state(jh2bh(jh)); - spin_lock(&journal->j_list_lock); - __jbd2_journal_unfile_buffer(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); -} - -/* - * Called from jbd2_journal_try_to_free_buffers(). - * - * Called under jbd_lock_bh_state(bh) - */ -static void -__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) -{ - struct journal_head *jh; - - jh = bh2jh(bh); - - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - - if (jh->b_next_transaction != 0) - goto out; - - spin_lock(&journal->j_list_lock); - if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { - if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { - /* A written-back ordered data buffer */ - JBUFFER_TRACE(jh, "release data"); - __jbd2_journal_unfile_buffer(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); - } - } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { - /* written-back checkpointed metadata buffer */ - if (jh->b_jlist == BJ_None) { - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __jbd2_journal_remove_checkpoint(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); - } - } - spin_unlock(&journal->j_list_lock); -out: - return; -} - - -/** - * int jbd2_journal_try_to_free_buffers() - try to free page buffers. - * @journal: journal for operation - * @page: to try and free - * @unused_gfp_mask: unused - * - * - * For all the buffers on this page, - * if they are fully written out ordered data, move them onto BUF_CLEAN - * so try_to_free_buffers() can reap them. - * - * This function returns non-zero if we wish try_to_free_buffers() - * to be called. We do this if the page is releasable by try_to_free_buffers(). - * We also do it if the page has locked or dirty buffers and the caller wants - * us to perform sync or async writeout. - * - * This complicates JBD locking somewhat. We aren't protected by the - * BKL here. We wish to remove the buffer from its committing or - * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer. - * - * This may *change* the value of transaction_t->t_datalist, so anyone - * who looks at t_datalist needs to lock against this function. - * - * Even worse, someone may be doing a jbd2_journal_dirty_data on this - * buffer. So we need to lock against that. jbd2_journal_dirty_data() - * will come out of the lock with the buffer dirty, which makes it - * ineligible for release here. - * - * Who else is affected by this? hmm... Really the only contender - * is do_get_write_access() - it could be looking at the buffer while - * journal_try_to_free_buffer() is changing its state. But that - * cannot happen because we never reallocate freed data as metadata - * while the data is part of a transaction. Yes? - */ -int jbd2_journal_try_to_free_buffers(journal_t *journal, - struct page *page, gfp_t unused_gfp_mask) -{ - struct buffer_head *head; - struct buffer_head *bh; - int ret = 0; - - J_ASSERT(PageLocked(page)); - - head = page_buffers(page); - bh = head; - do { - struct journal_head *jh; - - /* - * We take our own ref against the journal_head here to avoid - * having to add tons of locking around each instance of - * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). - */ - jh = jbd2_journal_grab_journal_head(bh); - if (!jh) - continue; - - jbd_lock_bh_state(bh); - __journal_try_to_free_buffer(journal, bh); - jbd2_journal_put_journal_head(jh); - jbd_unlock_bh_state(bh); - if (buffer_jbd(bh)) - goto busy; - } while ((bh = bh->b_this_page) != head); - ret = try_to_free_buffers(page); -busy: - return ret; -} - -/* - * This buffer is no longer needed. If it is on an older transaction's - * checkpoint list we need to record it on this transaction's forget list - * to pin this buffer (and hence its checkpointing transaction) down until - * this transaction commits. If the buffer isn't on a checkpoint list, we - * release it. - * Returns non-zero if JBD no longer has an interest in the buffer. - * - * Called under j_list_lock. - * - * Called under jbd_lock_bh_state(bh). - */ -static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) -{ - int may_free = 1; - struct buffer_head *bh = jh2bh(jh); - - __jbd2_journal_unfile_buffer(jh); - - if (jh->b_cp_transaction) { - JBUFFER_TRACE(jh, "on running+cp transaction"); - __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); - clear_buffer_jbddirty(bh); - may_free = 0; - } else { - JBUFFER_TRACE(jh, "on running transaction"); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); - } - return may_free; -} - -/* - * jbd2_journal_invalidatepage - * - * This code is tricky. It has a number of cases to deal with. - * - * There are two invariants which this code relies on: - * - * i_size must be updated on disk before we start calling invalidatepage on the - * data. - * - * This is done in ext3 by defining an ext3_setattr method which - * updates i_size before truncate gets going. By maintaining this - * invariant, we can be sure that it is safe to throw away any buffers - * attached to the current transaction: once the transaction commits, - * we know that the data will not be needed. - * - * Note however that we can *not* throw away data belonging to the - * previous, committing transaction! - * - * Any disk blocks which *are* part of the previous, committing - * transaction (and which therefore cannot be discarded immediately) are - * not going to be reused in the new running transaction - * - * The bitmap committed_data images guarantee this: any block which is - * allocated in one transaction and removed in the next will be marked - * as in-use in the committed_data bitmap, so cannot be reused until - * the next transaction to delete the block commits. This means that - * leaving committing buffers dirty is quite safe: the disk blocks - * cannot be reallocated to a different file and so buffer aliasing is - * not possible. - * - * - * The above applies mainly to ordered data mode. In writeback mode we - * don't make guarantees about the order in which data hits disk --- in - * particular we don't guarantee that new dirty data is flushed before - * transaction commit --- so it is always safe just to discard data - * immediately in that mode. --sct - */ - -/* - * The journal_unmap_buffer helper function returns zero if the buffer - * concerned remains pinned as an anonymous buffer belonging to an older - * transaction. - * - * We're outside-transaction here. Either or both of j_running_transaction - * and j_committing_transaction may be NULL. - */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) -{ - transaction_t *transaction; - struct journal_head *jh; - int may_free = 1; - int ret; - - BUFFER_TRACE(bh, "entry"); - - /* - * It is safe to proceed here without the j_list_lock because the - * buffers cannot be stolen by try_to_free_buffers as long as we are - * holding the page lock. --sct - */ - - if (!buffer_jbd(bh)) - goto zap_buffer_unlocked; - - spin_lock(&journal->j_state_lock); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - - jh = jbd2_journal_grab_journal_head(bh); - if (!jh) - goto zap_buffer_no_jh; - - transaction = jh->b_transaction; - if (transaction == NULL) { - /* First case: not on any transaction. If it - * has no checkpoint link, then we can zap it: - * it's a writeback-mode buffer so we don't care - * if it hits disk safely. */ - if (!jh->b_cp_transaction) { - JBUFFER_TRACE(jh, "not on any transaction: zap"); - goto zap_buffer; - } - - if (!buffer_dirty(bh)) { - /* bdflush has written it. We can drop it now */ - goto zap_buffer; - } - - /* OK, it must be in the journal but still not - * written fully to disk: it's metadata or - * journaled data... */ - - if (journal->j_running_transaction) { - /* ... and once the current transaction has - * committed, the buffer won't be needed any - * longer. */ - JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, - journal->j_running_transaction); - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; - } else { - /* There is no currently-running transaction. So the - * orphan record which we wrote for this file must have - * passed into commit. We must attach this buffer to - * the committing transaction, if it exists. */ - if (journal->j_committing_transaction) { - JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, - journal->j_committing_transaction); - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; - } else { - /* The orphan record's transaction has - * committed. We can cleanse this buffer */ - clear_buffer_jbddirty(bh); - goto zap_buffer; - } - } - } else if (transaction == journal->j_committing_transaction) { - if (jh->b_jlist == BJ_Locked) { - /* - * The buffer is on the committing transaction's locked - * list. We have the buffer locked, so I/O has - * completed. So we can nail the buffer now. - */ - may_free = __dispose_buffer(jh, transaction); - goto zap_buffer; - } - /* - * If it is committing, we simply cannot touch it. We - * can remove it's next_transaction pointer from the - * running transaction if that is set, but nothing - * else. */ - JBUFFER_TRACE(jh, "on committing transaction"); - set_buffer_freed(bh); - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == - journal->j_running_transaction); - jh->b_next_transaction = NULL; - } - jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return 0; - } else { - /* Good, the buffer belongs to the running transaction. - * We are writing our own transaction's data, not any - * previous one's, so it is safe to throw it away - * (remember that we expect the filesystem to have set - * i_size already for this truncate so recovery will not - * expose the disk blocks we are discarding here.) */ - J_ASSERT_JH(jh, transaction == journal->j_running_transaction); - may_free = __dispose_buffer(jh, transaction); - } - -zap_buffer: - jbd2_journal_put_journal_head(jh); -zap_buffer_no_jh: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); -zap_buffer_unlocked: - clear_buffer_dirty(bh); - J_ASSERT_BH(bh, !buffer_jbddirty(bh)); - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - bh->b_bdev = NULL; - return may_free; -} - -/** - * void jbd2_journal_invalidatepage() - * @journal: journal to use for flush... - * @page: page to flush - * @offset: length of page to invalidate. - * - * Reap page buffers containing data after offset in page. - * - */ -void jbd2_journal_invalidatepage(journal_t *journal, - struct page *page, - unsigned long offset) -{ - struct buffer_head *head, *bh, *next; - unsigned int curr_off = 0; - int may_free = 1; - - if (!PageLocked(page)) - BUG(); - if (!page_has_buffers(page)) - return; - - /* We will potentially be playing with lists other than just the - * data lists (especially for journaled data mode), so be - * cautious in our locking. */ - - head = bh = page_buffers(page); - do { - unsigned int next_off = curr_off + bh->b_size; - next = bh->b_this_page; - - if (offset <= curr_off) { - /* This block is wholly outside the truncation point */ - lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); - unlock_buffer(bh); - } - curr_off = next_off; - bh = next; - - } while (bh != head); - - if (!offset) { - if (may_free && try_to_free_buffers(page)) - J_ASSERT(!page_has_buffers(page)); - } -} - -/* - * File a buffer on the given transaction list. - */ -void __jbd2_journal_file_buffer(struct journal_head *jh, - transaction_t *transaction, int jlist) -{ - struct journal_head **list = NULL; - int was_dirty = 0; - struct buffer_head *bh = jh2bh(jh); - - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); - assert_spin_locked(&transaction->t_journal->j_list_lock); - - J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); - J_ASSERT_JH(jh, jh->b_transaction == transaction || - jh->b_transaction == 0); - - if (jh->b_transaction && jh->b_jlist == jlist) - return; - - /* The following list of buffer states needs to be consistent - * with __jbd_unexpected_dirty_buffer()'s handling of dirty - * state. */ - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - if (test_clear_buffer_dirty(bh) || - test_clear_buffer_jbddirty(bh)) - was_dirty = 1; - } - - if (jh->b_transaction) - __jbd2_journal_temp_unlink_buffer(jh); - jh->b_transaction = transaction; - - switch (jlist) { - case BJ_None: - J_ASSERT_JH(jh, !jh->b_committed_data); - J_ASSERT_JH(jh, !jh->b_frozen_data); - return; - case BJ_SyncData: - list = &transaction->t_sync_datalist; - break; - case BJ_Metadata: - transaction->t_nr_buffers++; - list = &transaction->t_buffers; - break; - case BJ_Forget: - list = &transaction->t_forget; - break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; - case BJ_Shadow: - list = &transaction->t_shadow_list; - break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; - case BJ_Reserved: - list = &transaction->t_reserved_list; - break; - case BJ_Locked: - list = &transaction->t_locked_list; - break; - } - - __blist_add_buffer(list, jh); - jh->b_jlist = jlist; - - if (was_dirty) - set_buffer_jbddirty(bh); -} - -void jbd2_journal_file_buffer(struct journal_head *jh, - transaction_t *transaction, int jlist) -{ - jbd_lock_bh_state(jh2bh(jh)); - spin_lock(&transaction->t_journal->j_list_lock); - __jbd2_journal_file_buffer(jh, transaction, jlist); - spin_unlock(&transaction->t_journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); -} - -/* - * Remove a buffer from its current buffer list in preparation for - * dropping it from its current transaction entirely. If the buffer has - * already started to be used by a subsequent transaction, refile the - * buffer on that transaction's metadata list. - * - * Called under journal->j_list_lock - * - * Called under jbd_lock_bh_state(jh2bh(jh)) - */ -void __jbd2_journal_refile_buffer(struct journal_head *jh) -{ - int was_dirty; - struct buffer_head *bh = jh2bh(jh); - - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); - if (jh->b_transaction) - assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); - - /* If the buffer is now unused, just drop it. */ - if (jh->b_next_transaction == NULL) { - __jbd2_journal_unfile_buffer(jh); - return; - } - - /* - * It has been modified by a later transaction: add it to the new - * transaction's metadata list. - */ - - was_dirty = test_clear_buffer_jbddirty(bh); - __jbd2_journal_temp_unlink_buffer(jh); - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; - __jbd2_journal_file_buffer(jh, jh->b_transaction, - was_dirty ? BJ_Metadata : BJ_Reserved); - J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); - - if (was_dirty) - set_buffer_jbddirty(bh); -} - -/* - * For the unlocked version of this call, also make sure that any - * hanging journal_head is cleaned up if necessary. - * - * __jbd2_journal_refile_buffer is usually called as part of a single locked - * operation on a buffer_head, in which the caller is probably going to - * be hooking the journal_head onto other lists. In that case it is up - * to the caller to remove the journal_head if necessary. For the - * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be - * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. - * - * *** The journal_head may be freed by this call! *** - */ -void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - - spin_unlock(&journal->j_list_lock); - __brelse(bh); -} diff --git a/trunk/fs/jffs2/super.c b/trunk/fs/jffs2/super.c index bc4b8106a490..6de374513c01 100644 --- a/trunk/fs/jffs2/super.c +++ b/trunk/fs/jffs2/super.c @@ -334,10 +334,10 @@ static int __init init_jffs2_fs(void) which means just 'no padding', without the alignment thing. But GCC doesn't have that -- we have to just hope the structs are the right sizes, instead. */ - BUILD_BUG_ON(sizeof(struct jffs2_unknown_node) != 12); - BUILD_BUG_ON(sizeof(struct jffs2_raw_dirent) != 40); - BUILD_BUG_ON(sizeof(struct jffs2_raw_inode) != 68); - BUILD_BUG_ON(sizeof(struct jffs2_raw_summary) != 32); + BUG_ON(sizeof(struct jffs2_unknown_node) != 12); + BUG_ON(sizeof(struct jffs2_raw_dirent) != 40); + BUG_ON(sizeof(struct jffs2_raw_inode) != 68); + BUG_ON(sizeof(struct jffs2_raw_summary) != 32); printk(KERN_INFO "JFFS2 version 2.2." #ifdef CONFIG_JFFS2_FS_WRITEBUFFER diff --git a/trunk/fs/minix/inode.c b/trunk/fs/minix/inode.c index 1e36bae4d0eb..c11a4b9fb863 100644 --- a/trunk/fs/minix/inode.c +++ b/trunk/fs/minix/inode.c @@ -149,8 +149,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) return -ENOMEM; s->s_fs_info = sbi; - BUILD_BUG_ON(32 != sizeof (struct minix_inode)); - BUILD_BUG_ON(64 != sizeof(struct minix2_inode)); + /* N.B. These should be compile-time tests. + Unfortunately that is impossible. */ + if (32 != sizeof (struct minix_inode)) + panic("bad V1 i-node size"); + if (64 != sizeof(struct minix2_inode)) + panic("bad V2 i-node size"); if (!sb_set_blocksize(s, BLOCK_SIZE)) goto out_bad_hblock; diff --git a/trunk/fs/ocfs2/super.c b/trunk/fs/ocfs2/super.c index 76b46ebbb10c..4c29cd7cc8e6 100644 --- a/trunk/fs/ocfs2/super.c +++ b/trunk/fs/ocfs2/super.c @@ -339,7 +339,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) #if BITS_PER_LONG == 32 # if defined(CONFIG_LBD) - BUILD_BUG_ON(sizeof(sector_t) != 8); + BUG_ON(sizeof(sector_t) != 8); pagefactor = PAGE_CACHE_SIZE; bitshift = BITS_PER_LONG; # else diff --git a/trunk/fs/reiserfs/super.c b/trunk/fs/reiserfs/super.c index 9041802df832..c89aa2338191 100644 --- a/trunk/fs/reiserfs/super.c +++ b/trunk/fs/reiserfs/super.c @@ -430,30 +430,21 @@ int remove_save_link(struct inode *inode, int truncate) return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); } -static void reiserfs_kill_sb(struct super_block *s) -{ - if (REISERFS_SB(s)) { - if (REISERFS_SB(s)->xattr_root) { - d_invalidate(REISERFS_SB(s)->xattr_root); - dput(REISERFS_SB(s)->xattr_root); - REISERFS_SB(s)->xattr_root = NULL; - } - - if (REISERFS_SB(s)->priv_root) { - d_invalidate(REISERFS_SB(s)->priv_root); - dput(REISERFS_SB(s)->priv_root); - REISERFS_SB(s)->priv_root = NULL; - } - } - - kill_block_super(s); -} - static void reiserfs_put_super(struct super_block *s) { struct reiserfs_transaction_handle th; th.t_trans_id = 0; + if (REISERFS_SB(s)->xattr_root) { + d_invalidate(REISERFS_SB(s)->xattr_root); + dput(REISERFS_SB(s)->xattr_root); + } + + if (REISERFS_SB(s)->priv_root) { + d_invalidate(REISERFS_SB(s)->priv_root); + dput(REISERFS_SB(s)->priv_root); + } + /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { if (!journal_begin(&th, s, 10)) { @@ -2165,7 +2156,7 @@ struct file_system_type reiserfs_fs_type = { .owner = THIS_MODULE, .name = "reiserfs", .get_sb = get_super_block, - .kill_sb = reiserfs_kill_sb, + .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/trunk/fs/splice.c b/trunk/fs/splice.c index a567010b62ac..13e92dd19fbb 100644 --- a/trunk/fs/splice.c +++ b/trunk/fs/splice.c @@ -607,7 +607,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, ret = -ENOMEM; page = page_cache_alloc_cold(mapping); if (unlikely(!page)) - goto out_ret; + goto out_nomem; /* * This will also lock the page @@ -666,7 +666,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (sd->pos + this_len > isize) vmtruncate(mapping->host, isize); - goto out_ret; + goto out; } if (buf->page != page) { @@ -698,7 +698,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, out: page_cache_release(page); unlock_page(page); -out_ret: +out_nomem: return ret; } diff --git a/trunk/fs/super.c b/trunk/fs/super.c index 47e554c12e76..aec99ddbe53f 100644 --- a/trunk/fs/super.c +++ b/trunk/fs/super.c @@ -260,17 +260,17 @@ int fsync_super(struct super_block *sb) * that need destruction out of superblock, call generic_shutdown_super() * and release aforementioned objects. Note: dentries and inodes _are_ * taken care of and do not need specific handling. - * - * Upon calling this function, the filesystem may no longer alter or - * rearrange the set of dentries belonging to this super_block, nor may it - * change the attachments of dentries to inodes. */ void generic_shutdown_super(struct super_block *sb) { + struct dentry *root = sb->s_root; struct super_operations *sop = sb->s_op; - if (sb->s_root) { - shrink_dcache_for_umount(sb); + if (root) { + sb->s_root = NULL; + shrink_dcache_parent(root); + shrink_dcache_sb(sb); + dput(root); fsync_super(sb); lock_super(sb); sb->s_flags &= ~MS_ACTIVE; diff --git a/trunk/fs/sysv/super.c b/trunk/fs/sysv/super.c index dc9e7dc07fb7..350cba5d6803 100644 --- a/trunk/fs/sysv/super.c +++ b/trunk/fs/sysv/super.c @@ -358,11 +358,16 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent) unsigned long blocknr; int size = 0, i; - BUILD_BUG_ON(1024 != sizeof (struct xenix_super_block)); - BUILD_BUG_ON(512 != sizeof (struct sysv4_super_block)); - BUILD_BUG_ON(512 != sizeof (struct sysv2_super_block)); - BUILD_BUG_ON(500 != sizeof (struct coh_super_block)); - BUILD_BUG_ON(64 != sizeof (struct sysv_inode)); + if (1024 != sizeof (struct xenix_super_block)) + panic("Xenix FS: bad superblock size"); + if (512 != sizeof (struct sysv4_super_block)) + panic("SystemV FS: bad superblock size"); + if (512 != sizeof (struct sysv2_super_block)) + panic("SystemV FS: bad superblock size"); + if (500 != sizeof (struct coh_super_block)) + panic("Coherent FS: bad superblock size"); + if (64 != sizeof (struct sysv_inode)) + panic("sysv fs: bad inode size"); sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL); if (!sbi) diff --git a/trunk/include/acpi/pdc_intel.h b/trunk/include/acpi/pdc_intel.h index e72bfdd887f9..c5472be6f3a2 100644 --- a/trunk/include/acpi/pdc_intel.h +++ b/trunk/include/acpi/pdc_intel.h @@ -13,7 +13,6 @@ #define ACPI_PDC_SMP_C_SWCOORD (0x0040) #define ACPI_PDC_SMP_T_SWCOORD (0x0080) #define ACPI_PDC_C_C1_FFH (0x0100) -#define ACPI_PDC_C_C2C3_FFH (0x0200) #define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \ ACPI_PDC_C_C1_HALT | \ @@ -24,10 +23,8 @@ ACPI_PDC_SMP_P_SWCOORD | \ ACPI_PDC_P_FFH) -#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \ - ACPI_PDC_SMP_C1PT | \ - ACPI_PDC_C_C1_HALT | \ - ACPI_PDC_C_C1_FFH | \ - ACPI_PDC_C_C2C3_FFH) +#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \ + ACPI_PDC_SMP_C1PT | \ + ACPI_PDC_C_C1_HALT) #endif /* __PDC_INTEL_H__ */ diff --git a/trunk/include/acpi/processor.h b/trunk/include/acpi/processor.h index 7798d2a9f793..9dd5b75961f8 100644 --- a/trunk/include/acpi/processor.h +++ b/trunk/include/acpi/processor.h @@ -29,9 +29,6 @@ #define DOMAIN_COORD_TYPE_SW_ANY 0xfd #define DOMAIN_COORD_TYPE_HW_ALL 0xfe -#define ACPI_CSTATE_SYSTEMIO (0) -#define ACPI_CSTATE_FFH (1) - /* Power Management */ struct acpi_processor_cx; @@ -61,8 +58,6 @@ struct acpi_processor_cx { u8 valid; u8 type; u32 address; - u8 space_id; - u8 index; u32 latency; u32 latency_ticks; u32 power; @@ -211,9 +206,6 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr); #ifdef ARCH_HAS_POWER_INIT void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, unsigned int cpu); -int acpi_processor_ffh_cstate_probe(unsigned int cpu, - struct acpi_processor_cx *cx, struct acpi_power_register *reg); -void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate); #else static inline void acpi_processor_power_init_bm_check(struct acpi_processor_flags @@ -222,16 +214,6 @@ static inline void acpi_processor_power_init_bm_check(struct flags->bm_check = 1; return; } -static inline int acpi_processor_ffh_cstate_probe(unsigned int cpu, - struct acpi_processor_cx *cx, struct acpi_power_register *reg) -{ - return -1; -} -static inline void acpi_processor_ffh_cstate_enter( - struct acpi_processor_cx *cstate) -{ - return; -} #endif /* in processor_perflib.c */ diff --git a/trunk/include/asm-alpha/io.h b/trunk/include/asm-alpha/io.h index 5d15af24573b..f5ae98c25d1f 100644 --- a/trunk/include/asm-alpha/io.h +++ b/trunk/include/asm-alpha/io.h @@ -533,6 +533,19 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); #define eth_io_copy_and_sum(skb,src,len,unused) \ memcpy_fromio((skb)->data,src,len) +static inline int +check_signature(const volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + do { + if (readb(io_addr) != *signature) + return 0; + io_addr++; + signature++; + } while (--length); + return 1; +} + /* * The Alpha Jensen hardware for some rather strange reason puts * the RTC clock at 0x170 instead of 0x70. Probably due to some diff --git a/trunk/include/asm-arm/arch-versatile/hardware.h b/trunk/include/asm-arm/arch-versatile/hardware.h index edc06598d187..41c1bee342ad 100644 --- a/trunk/include/asm-arm/arch-versatile/hardware.h +++ b/trunk/include/asm-arm/arch-versatile/hardware.h @@ -28,8 +28,8 @@ /* * PCI space virtual addresses */ -#define VERSATILE_PCI_VIRT_BASE (void __iomem *)0xe8000000ul -#define VERSATILE_PCI_CFG_VIRT_BASE (void __iomem *)0xe9000000ul +#define VERSATILE_PCI_VIRT_BASE 0xe8000000 +#define VERSATILE_PCI_CFG_VIRT_BASE 0xe9000000 #if 0 #define VERSATILE_PCI_VIRT_MEM_BASE0 0xf4000000 diff --git a/trunk/include/asm-arm/io.h b/trunk/include/asm-arm/io.h index ae999fd5dc67..34aaaac4f617 100644 --- a/trunk/include/asm-arm/io.h +++ b/trunk/include/asm-arm/io.h @@ -193,6 +193,23 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define eth_io_copy_and_sum(s,c,l,b) \ eth_copy_and_sum((s),__mem_pci(c),(l),(b)) +static inline int +check_signature(void __iomem *io_addr, const unsigned char *signature, + int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + #elif !defined(readb) #define readb(c) (__readwrite_bug("readb"),0) diff --git a/trunk/include/asm-arm/uaccess.h b/trunk/include/asm-arm/uaccess.h index 09ad0cab9014..87aba57a66c4 100644 --- a/trunk/include/asm-arm/uaccess.h +++ b/trunk/include/asm-arm/uaccess.h @@ -110,7 +110,7 @@ extern int __get_user_4(void *); #define get_user(x,p) \ ({ \ const register typeof(*(p)) __user *__p asm("r0") = (p);\ - register unsigned long __r2 asm("r2"); \ + register unsigned int __r2 asm("r2"); \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ diff --git a/trunk/include/asm-avr32/irq_regs.h b/trunk/include/asm-avr32/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/trunk/include/asm-avr32/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/trunk/include/asm-frv/io.h b/trunk/include/asm-frv/io.h index 20e44fe00abf..7765f5528894 100644 --- a/trunk/include/asm-frv/io.h +++ b/trunk/include/asm-frv/io.h @@ -385,6 +385,27 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) */ #define xlate_dev_kmem_ptr(p) p +/* + * Check BIOS signature + */ +static inline int check_signature(volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + + retval = 1; +out: + return retval; +} + #endif /* __KERNEL__ */ #endif /* _ASM_IO_H */ diff --git a/trunk/include/asm-generic/bitops/sched.h b/trunk/include/asm-generic/bitops/sched.h index 815bb0148060..5ef93a4d009f 100644 --- a/trunk/include/asm-generic/bitops/sched.h +++ b/trunk/include/asm-generic/bitops/sched.h @@ -15,7 +15,7 @@ static inline int sched_find_first_bit(const unsigned long *b) #if BITS_PER_LONG == 64 if (unlikely(b[0])) return __ffs(b[0]); - if (likely(b[1])) + if (unlikely(b[1])) return __ffs(b[1]) + 64; return __ffs(b[2]) + 128; #elif BITS_PER_LONG == 32 diff --git a/trunk/include/asm-i386/io.h b/trunk/include/asm-i386/io.h index 68df0dc3ab8f..b3724fe93ff1 100644 --- a/trunk/include/asm-i386/io.h +++ b/trunk/include/asm-i386/io.h @@ -224,6 +224,33 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +static inline int check_signature(volatile void __iomem * io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* * Cache management * diff --git a/trunk/include/asm-i386/msr.h b/trunk/include/asm-i386/msr.h index 8c31887e1cfa..62b76cd96957 100644 --- a/trunk/include/asm-i386/msr.h +++ b/trunk/include/asm-i386/msr.h @@ -95,8 +95,6 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_P6_PERFCTR0 0xc1 #define MSR_P6_PERFCTR1 0xc2 -#define MSR_FSB_FREQ 0xcd - #define MSR_IA32_BBL_CR_CTL 0x119 @@ -127,9 +125,6 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_PERF_CTL 0x199 -#define MSR_IA32_MPERF 0xE7 -#define MSR_IA32_APERF 0xE8 - #define MSR_IA32_THERM_CONTROL 0x19a #define MSR_IA32_THERM_INTERRUPT 0x19b #define MSR_IA32_THERM_STATUS 0x19c diff --git a/trunk/include/asm-i386/processor.h b/trunk/include/asm-i386/processor.h index e0ddca94d50c..2277127696d2 100644 --- a/trunk/include/asm-i386/processor.h +++ b/trunk/include/asm-i386/processor.h @@ -306,8 +306,6 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) : :"a" (eax), "c" (ecx)); } -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); - /* from system description table in BIOS. Mostly for MCA use, but others may find it useful. */ extern unsigned int machine_id; diff --git a/trunk/include/asm-i386/uaccess.h b/trunk/include/asm-i386/uaccess.h index eef5133b9ce2..54d905ebc63d 100644 --- a/trunk/include/asm-i386/uaccess.h +++ b/trunk/include/asm-i386/uaccess.h @@ -404,6 +404,20 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to, * anything, so this is accurate. */ +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { @@ -425,27 +439,35 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) return __copy_to_user_ll(to, from, n); } +static __always_inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_sleep(); + return __copy_to_user_inatomic(to, from, n); +} + /** - * __copy_to_user: - Copy a block of data into user space, with less checking. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. * @n: Number of bytes to copy. * * Context: User context only. This function may sleep. * - * Copy data from kernel space to user space. Caller must check + * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. * * Returns number of bytes that could not be copied. * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + * + * An alternate version - __copy_from_user_inatomic() - may be called from + * atomic context and will fail rather than sleep. In this case the + * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h + * for explanation of why this is needed. */ -static __always_inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) -{ - might_sleep(); - return __copy_to_user_inatomic(to, from, n); -} - static __always_inline unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { @@ -471,29 +493,6 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) } return __copy_from_user_ll_nozero(to, from, n); } - -/** - * __copy_from_user: - Copy a block of data from user space, with less checking. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from user space to kernel space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - * - * An alternate version - __copy_from_user_inatomic() - may be called from - * atomic context and will fail rather than sleep. In this case the - * uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h - * for explanation of why this is needed. - */ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { diff --git a/trunk/include/asm-i386/unistd.h b/trunk/include/asm-i386/unistd.h index beeeaf6b054a..3ca7ab963d7d 100644 --- a/trunk/include/asm-i386/unistd.h +++ b/trunk/include/asm-i386/unistd.h @@ -324,11 +324,10 @@ #define __NR_vmsplice 316 #define __NR_move_pages 317 #define __NR_getcpu 318 -#define __NR_epoll_pwait 319 #ifdef __KERNEL__ -#define NR_syscalls 320 +#define NR_syscalls 319 #include /* diff --git a/trunk/include/asm-i386/vic.h b/trunk/include/asm-i386/vic.h index 53100f353612..4abfcfb91eb8 100644 --- a/trunk/include/asm-i386/vic.h +++ b/trunk/include/asm-i386/vic.h @@ -58,4 +58,4 @@ static const int VIC_CPI_Registers[] = #define VIC_BOOT_INTERRUPT_MASK 0xfe -extern void smp_vic_timer_interrupt(void); +extern void smp_vic_timer_interrupt(struct pt_regs *regs); diff --git a/trunk/include/asm-i386/voyager.h b/trunk/include/asm-i386/voyager.h index 5b27838905b2..e74c54aa757f 100644 --- a/trunk/include/asm-i386/voyager.h +++ b/trunk/include/asm-i386/voyager.h @@ -118,33 +118,33 @@ typedef struct voyager_module { } voyager_module_t; typedef struct voyager_eeprom_hdr { - __u8 module_id[4]; - __u8 version_id; - __u8 config_id; - __u16 boundry_id; /* boundary scan id */ - __u16 ee_size; /* size of EEPROM */ - __u8 assembly[11]; /* assembly # */ - __u8 assembly_rev; /* assembly rev */ - __u8 tracer[4]; /* tracer number */ - __u16 assembly_cksum; /* asm checksum */ - __u16 power_consump; /* pwr requirements */ - __u16 num_asics; /* number of asics */ - __u16 bist_time; /* min. bist time */ - __u16 err_log_offset; /* error log offset */ - __u16 scan_path_offset;/* scan path offset */ - __u16 cct_offset; - __u16 log_length; /* length of err log */ - __u16 xsum_end; /* offset to end of + __u8 module_id[4] __attribute__((packed)); + __u8 version_id __attribute__((packed)); + __u8 config_id __attribute__((packed)); + __u16 boundry_id __attribute__((packed)); /* boundary scan id */ + __u16 ee_size __attribute__((packed)); /* size of EEPROM */ + __u8 assembly[11] __attribute__((packed)); /* assembly # */ + __u8 assembly_rev __attribute__((packed)); /* assembly rev */ + __u8 tracer[4] __attribute__((packed)); /* tracer number */ + __u16 assembly_cksum __attribute__((packed)); /* asm checksum */ + __u16 power_consump __attribute__((packed)); /* pwr requirements */ + __u16 num_asics __attribute__((packed)); /* number of asics */ + __u16 bist_time __attribute__((packed)); /* min. bist time */ + __u16 err_log_offset __attribute__((packed)); /* error log offset */ + __u16 scan_path_offset __attribute__((packed));/* scan path offset */ + __u16 cct_offset __attribute__((packed)); + __u16 log_length __attribute__((packed)); /* length of err log */ + __u16 xsum_end __attribute__((packed)); /* offset to end of checksum */ - __u8 reserved[4]; - __u8 sflag; /* starting sentinal */ - __u8 part_number[13]; /* prom part number */ - __u8 version[10]; /* version number */ - __u8 signature[8]; - __u16 eeprom_chksum; - __u32 data_stamp_offset; - __u8 eflag ; /* ending sentinal */ -} __attribute__((packed)) voyager_eprom_hdr_t; + __u8 reserved[4] __attribute__((packed)); + __u8 sflag __attribute__((packed)); /* starting sentinal */ + __u8 part_number[13] __attribute__((packed)); /* prom part number */ + __u8 version[10] __attribute__((packed)); /* version number */ + __u8 signature[8] __attribute__((packed)); + __u16 eeprom_chksum __attribute__((packed)); + __u32 data_stamp_offset __attribute__((packed)); + __u8 eflag __attribute__((packed)); /* ending sentinal */ +} voyager_eprom_hdr_t; @@ -155,30 +155,30 @@ typedef struct voyager_eeprom_hdr { * in the module EPROMs. We really only care about the IDs and * offsets */ typedef struct voyager_sp_table { - __u8 asic_id; - __u8 bypass_flag; - __u16 asic_data_offset; - __u16 config_data_offset; -} __attribute__((packed)) voyager_sp_table_t; + __u8 asic_id __attribute__((packed)); + __u8 bypass_flag __attribute__((packed)); + __u16 asic_data_offset __attribute__((packed)); + __u16 config_data_offset __attribute__((packed)); +} voyager_sp_table_t; typedef struct voyager_jtag_table { - __u8 icode[4]; - __u8 runbist[4]; - __u8 intest[4]; - __u8 samp_preld[4]; - __u8 ireg_len; -} __attribute__((packed)) voyager_jtt_t; + __u8 icode[4] __attribute__((packed)); + __u8 runbist[4] __attribute__((packed)); + __u8 intest[4] __attribute__((packed)); + __u8 samp_preld[4] __attribute__((packed)); + __u8 ireg_len __attribute__((packed)); +} voyager_jtt_t; typedef struct voyager_asic_data_table { - __u8 jtag_id[4]; - __u16 length_bsr; - __u16 length_bist_reg; - __u32 bist_clk; - __u16 subaddr_bits; - __u16 seed_bits; - __u16 sig_bits; - __u16 jtag_offset; -} __attribute__((packed)) voyager_at_t; + __u8 jtag_id[4] __attribute__((packed)); + __u16 length_bsr __attribute__((packed)); + __u16 length_bist_reg __attribute__((packed)); + __u32 bist_clk __attribute__((packed)); + __u16 subaddr_bits __attribute__((packed)); + __u16 seed_bits __attribute__((packed)); + __u16 sig_bits __attribute__((packed)); + __u16 jtag_offset __attribute__((packed)); +} voyager_at_t; /* Voyager Interrupt Controller (VIC) registers */ @@ -328,52 +328,52 @@ struct voyager_bios_info { #define NUMBER_OF_POS_REGS 8 typedef struct { - __u8 MC_Slot; - __u8 POS_Values[NUMBER_OF_POS_REGS]; -} __attribute__((packed)) MC_SlotInformation_t; + __u8 MC_Slot __attribute__((packed)); + __u8 POS_Values[NUMBER_OF_POS_REGS] __attribute__((packed)); +} MC_SlotInformation_t; struct QuadDescription { - __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields + __u8 Type __attribute__((packed)); /* for type 0 (DYADIC or MONADIC) all fields * will be zero except for slot */ - __u8 StructureVersion; - __u32 CPI_BaseAddress; - __u32 LARC_BankSize; - __u32 LocalMemoryStateBits; - __u8 Slot; /* Processor slots 1 - 4 */ -} __attribute__((packed)); + __u8 StructureVersion __attribute__((packed)); + __u32 CPI_BaseAddress __attribute__((packed)); + __u32 LARC_BankSize __attribute__((packed)); + __u32 LocalMemoryStateBits __attribute__((packed)); + __u8 Slot __attribute__((packed)); /* Processor slots 1 - 4 */ +}; struct ProcBoardInfo { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOfBoards; - struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS]; -} __attribute__((packed)); + __u8 Type __attribute__((packed)); + __u8 StructureVersion __attribute__((packed)); + __u8 NumberOfBoards __attribute__((packed)); + struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS] __attribute__((packed)); +}; struct CacheDescription { - __u8 Level; - __u32 TotalSize; - __u16 LineSize; - __u8 Associativity; - __u8 CacheType; - __u8 WriteType; - __u8 Number_CPUs_SharedBy; - __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS]; + __u8 Level __attribute__((packed)); + __u32 TotalSize __attribute__((packed)); + __u16 LineSize __attribute__((packed)); + __u8 Associativity __attribute__((packed)); + __u8 CacheType __attribute__((packed)); + __u8 WriteType __attribute__((packed)); + __u8 Number_CPUs_SharedBy __attribute__((packed)); + __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS] __attribute__((packed)); -} __attribute__((packed)); +}; struct CPU_Description { - __u8 CPU_HardwareId; - char *FRU_String; - __u8 NumberOfCacheLevels; - struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS]; -} __attribute__((packed)); + __u8 CPU_HardwareId __attribute__((packed)); + char *FRU_String __attribute__((packed)); + __u8 NumberOfCacheLevels __attribute__((packed)); + struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS] __attribute__((packed)); +}; struct CPU_Info { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOf_CPUs; - struct CPU_Description CPU_Data[MAX_CPUS]; -} __attribute__((packed)); + __u8 Type __attribute__((packed)); + __u8 StructureVersion __attribute__((packed)); + __u8 NumberOf_CPUs __attribute__((packed)); + struct CPU_Description CPU_Data[MAX_CPUS] __attribute__((packed)); +}; /* diff --git a/trunk/include/asm-m32r/io.h b/trunk/include/asm-m32r/io.h index d06933bd6318..70ad1c949c2b 100644 --- a/trunk/include/asm-m32r/io.h +++ b/trunk/include/asm-m32r/io.h @@ -166,6 +166,38 @@ static inline void _writel(unsigned long l, unsigned long addr) #define flush_write_buffers() do { } while (0) /* M32R_FIXME */ +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the ISA mmio address io_addr. + * Returns 1 on a match. + * + * This function is deprecated. New drivers should use ioremap and + * check_signature. + */ + +static inline int check_signature(void __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; +#if 0 +printk("check_signature\n"); + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: +#endif + return retval; +} + static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/trunk/include/asm-m68k/sun3mmu.h b/trunk/include/asm-m68k/sun3mmu.h index d8f17a0d8c9f..6c8c17d047a1 100644 --- a/trunk/include/asm-m68k/sun3mmu.h +++ b/trunk/include/asm-m68k/sun3mmu.h @@ -4,7 +4,6 @@ #ifndef __SUN3_MMU_H__ #define __SUN3_MMU_H__ -#include #include #include @@ -161,7 +160,7 @@ static inline void sun3_put_context(unsigned char c) return; } -extern void __iomem *sun3_ioremap(unsigned long phys, unsigned long size, +extern void *sun3_ioremap(unsigned long phys, unsigned long size, unsigned long type); extern int sun3_map_test(unsigned long addr, char *val); diff --git a/trunk/include/asm-m68k/uaccess.h b/trunk/include/asm-m68k/uaccess.h index e4c9f080ff20..88b1f47400e1 100644 --- a/trunk/include/asm-m68k/uaccess.h +++ b/trunk/include/asm-m68k/uaccess.h @@ -76,7 +76,7 @@ asm volatile ("\n" \ break; \ case 8: \ { \ - const void __user *__pu_ptr = (ptr); \ + const void *__pu_ptr = (ptr); \ asm volatile ("\n" \ "1: moves.l %2,(%1)+\n" \ "2: moves.l %R2,(%1)\n" \ @@ -125,7 +125,7 @@ asm volatile ("\n" \ " .previous" \ : "+d" (res), "=&" #reg (__gu_val) \ : "m" (*(ptr)), "i" (err)); \ - (x) = (typeof(*(ptr)))(unsigned long)__gu_val; \ + (x) = (typeof(*(ptr)))(long)__gu_val; \ }) #define __get_user(x, ptr) \ @@ -221,16 +221,16 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __get_user_asm(res, *(u8 *)to, (u8 __user *)from, u8, b, d, 1); + __get_user_asm(res, *(u8 *)to, (u8 *)from, u8, b, d, 1); break; case 2: - __get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, d, 2); + __get_user_asm(res, *(u16 *)to, (u16 *)from, u16, w, d, 2); break; case 3: __constant_copy_from_user_asm(res, to, from, tmp, 3, w, b,); break; case 4: - __get_user_asm(res, *(u32 *)to, (u32 __user *)from, u32, l, r, 4); + __get_user_asm(res, *(u32 *)to, (u32 *)from, u32, l, r, 4); break; case 5: __constant_copy_from_user_asm(res, to, from, tmp, 5, l, b,); @@ -302,16 +302,16 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) switch (n) { case 1: - __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1); + __put_user_asm(res, *(u8 *)from, (u8 *)to, b, d, 1); break; case 2: - __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, d, 2); + __put_user_asm(res, *(u16 *)from, (u16 *)to, w, d, 2); break; case 3: __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,); break; case 4: - __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4); + __put_user_asm(res, *(u32 *)from, (u32 *)to, l, r, 4); break; case 5: __constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,); diff --git a/trunk/include/asm-m68knommu/unistd.h b/trunk/include/asm-m68knommu/unistd.h index ebaf03197114..daafb5d43ef1 100644 --- a/trunk/include/asm-m68knommu/unistd.h +++ b/trunk/include/asm-m68knommu/unistd.h @@ -281,43 +281,14 @@ #define __NR_mq_notify 275 #define __NR_mq_getsetattr 276 #define __NR_waitid 277 -#define __NR_vserver 278 +#define __NR_sys_setaltroot 278 #define __NR_add_key 279 #define __NR_request_key 280 #define __NR_keyctl 281 -#define __NR_ioprio_set 282 -#define __NR_ioprio_get 283 -#define __NR_inotify_init 284 -#define __NR_inotify_add_watch 285 -#define __NR_inotify_rm_watch 286 -#define __NR_migrate_pages 287 -#define __NR_openat 288 -#define __NR_mkdirat 289 -#define __NR_mknodat 290 -#define __NR_fchownat 291 -#define __NR_futimesat 292 -#define __NR_fstatat64 293 -#define __NR_unlinkat 294 -#define __NR_renameat 295 -#define __NR_linkat 296 -#define __NR_symlinkat 297 -#define __NR_readlinkat 298 -#define __NR_fchmodat 299 -#define __NR_faccessat 300 -#define __NR_pselect6 301 -#define __NR_ppoll 302 -#define __NR_unshare 303 -#define __NR_set_robust_list 304 -#define __NR_get_robust_list 305 -#define __NR_splice 306 -#define __NR_sync_file_range 307 -#define __NR_tee 308 -#define __NR_vmsplice 309 -#define __NR_move_pages 310 - + #ifdef __KERNEL__ -#define NR_syscalls 311 +#define NR_syscalls 282 #include /* user-visible error numbers are in the range -1 - -MAX_ERRNO: see diff --git a/trunk/include/asm-mips/io.h b/trunk/include/asm-mips/io.h index c2d124badbe5..df624e1ee6e2 100644 --- a/trunk/include/asm-mips/io.h +++ b/trunk/include/asm-mips/io.h @@ -561,6 +561,32 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *); */ #define eth_io_copy_and_sum(skb,src,len,unused) memcpy_fromio((skb)->data,(src),(len)) +/* + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ +static inline int check_signature(char __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* * The caches on some architectures aren't dma-coherent and have need to * handle this in software. There are three types of operations that diff --git a/trunk/include/asm-mips/irq.h b/trunk/include/asm-mips/irq.h index 0ce2a80b689e..1a9804c65369 100644 --- a/trunk/include/asm-mips/irq.h +++ b/trunk/include/asm-mips/irq.h @@ -24,6 +24,8 @@ static inline int irq_canonicalize(int irq) #define irq_canonicalize(irq) (irq) /* Sane hardware, sane code ... */ #endif +struct pt_regs; + extern asmlinkage unsigned int do_IRQ(unsigned int irq); #ifdef CONFIG_MIPS_MT_SMTC diff --git a/trunk/include/asm-mips/stackframe.h b/trunk/include/asm-mips/stackframe.h index 1fae5dc58138..158a4cd12e46 100644 --- a/trunk/include/asm-mips/stackframe.h +++ b/trunk/include/asm-mips/stackframe.h @@ -59,43 +59,69 @@ .endm #ifdef CONFIG_SMP -#ifdef CONFIG_MIPS_MT_SMTC -#define PTEBASE_SHIFT 19 /* TCBIND */ -#else -#define PTEBASE_SHIFT 23 /* CONTEXT */ -#endif .macro get_saved_sp /* SMP variation */ +#ifdef CONFIG_32BIT #ifdef CONFIG_MIPS_MT_SMTC - mfc0 k0, CP0_TCBIND -#else - MFC0 k0, CP0_CONTEXT -#endif -#if defined(CONFIG_BUILD_ELF64) || (defined(CONFIG_64BIT) && __GNUC__ < 4) - lui k1, %highest(kernelsp) - daddiu k1, %higher(kernelsp) - dsll k1, 16 - daddiu k1, %hi(kernelsp) - dsll k1, 16 + .set mips32 + mfc0 k0, CP0_TCBIND; + .set mips0 + lui k1, %hi(kernelsp) + srl k0, k0, 19 + /* No need to shift down and up to clear bits 0-1 */ #else + mfc0 k0, CP0_CONTEXT lui k1, %hi(kernelsp) + srl k0, k0, 23 +#endif + addu k1, k0 + LONG_L k1, %lo(kernelsp)(k1) #endif - LONG_SRL k0, PTEBASE_SHIFT - LONG_ADDU k1, k0 +#ifdef CONFIG_64BIT +#ifdef CONFIG_MIPS_MT_SMTC + .set mips64 + mfc0 k0, CP0_TCBIND; + .set mips0 + lui k0, %highest(kernelsp) + dsrl k1, 19 + /* No need to shift down and up to clear bits 0-2 */ +#else + MFC0 k1, CP0_CONTEXT + lui k0, %highest(kernelsp) + dsrl k1, 23 + daddiu k0, %higher(kernelsp) + dsll k0, k0, 16 + daddiu k0, %hi(kernelsp) + dsll k0, k0, 16 +#endif /* CONFIG_MIPS_MT_SMTC */ + daddu k1, k1, k0 LONG_L k1, %lo(kernelsp)(k1) +#endif /* CONFIG_64BIT */ .endm .macro set_saved_sp stackp temp temp2 +#ifdef CONFIG_32BIT +#ifdef CONFIG_MIPS_MT_SMTC + mfc0 \temp, CP0_TCBIND + srl \temp, 19 +#else + mfc0 \temp, CP0_CONTEXT + srl \temp, 23 +#endif +#endif +#ifdef CONFIG_64BIT #ifdef CONFIG_MIPS_MT_SMTC mfc0 \temp, CP0_TCBIND + dsrl \temp, 19 #else MFC0 \temp, CP0_CONTEXT + dsrl \temp, 23 +#endif #endif - LONG_SRL \temp, PTEBASE_SHIFT LONG_S \stackp, kernelsp(\temp) .endm #else .macro get_saved_sp /* Uniprocessor variation */ -#if defined(CONFIG_BUILD_ELF64) || (defined(CONFIG_64BIT) && __GNUC__ < 4) +#ifdef CONFIG_64BIT lui k1, %highest(kernelsp) daddiu k1, %higher(kernelsp) dsll k1, k1, 16 diff --git a/trunk/include/asm-mips/termbits.h b/trunk/include/asm-mips/termbits.h index b62ec7c521cc..fa6d04dac56b 100644 --- a/trunk/include/asm-mips/termbits.h +++ b/trunk/include/asm-mips/termbits.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1995, 96, 99, 2001, 06 Ralf Baechle + * Copyright (C) 1995, 1996, 1999, 2001 Ralf Baechle * Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) 2001 MIPS Technologies, Inc. */ @@ -13,8 +13,14 @@ #include typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; +#if (_MIPS_SZLONG == 32) +typedef unsigned long speed_t; +typedef unsigned long tcflag_t; +#endif +#if (_MIPS_SZLONG == 64) +typedef __u32 speed_t; +typedef __u32 tcflag_t; +#endif /* * The ABI says nothing about NCC but seems to use NCCS as diff --git a/trunk/include/asm-powerpc/io.h b/trunk/include/asm-powerpc/io.h index 3baff8b0fd5a..cbbd8c648df1 100644 --- a/trunk/include/asm-powerpc/io.h +++ b/trunk/include/asm-powerpc/io.h @@ -404,6 +404,32 @@ static inline void __out_be64(volatile unsigned long __iomem *addr, unsigned lon #include +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ +static inline int check_signature(const volatile void __iomem * io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) diff --git a/trunk/include/asm-ppc/io.h b/trunk/include/asm-ppc/io.h index a4c411b753ef..3d9a9e6f3321 100644 --- a/trunk/include/asm-ppc/io.h +++ b/trunk/include/asm-ppc/io.h @@ -439,6 +439,22 @@ extern inline void * phys_to_virt(unsigned long address) #define iobarrier_r() eieio() #define iobarrier_w() eieio() +static inline int check_signature(volatile void __iomem * io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* * Here comes the ppc implementation of the IOMAP * interfaces. diff --git a/trunk/include/asm-s390/cio.h b/trunk/include/asm-s390/cio.h index 81287d86329d..da063cd5f0a0 100644 --- a/trunk/include/asm-s390/cio.h +++ b/trunk/include/asm-s390/cio.h @@ -275,12 +275,6 @@ struct ccw_dev_id { u16 devno; }; -static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, - struct ccw_dev_id *dev_id2) -{ - return !memcmp(dev_id1, dev_id2, sizeof(struct ccw_dev_id)); -} - extern int diag210(struct diag210 *addr); extern void wait_cons_dev(void); diff --git a/trunk/include/asm-s390/timer.h b/trunk/include/asm-s390/timer.h index 30e5cbe570f2..fcd6c256a2d1 100644 --- a/trunk/include/asm-s390/timer.h +++ b/trunk/include/asm-s390/timer.h @@ -26,7 +26,7 @@ struct vtimer_list { spinlock_t lock; unsigned long magic; - void (*function)(unsigned long); + void (*function)(unsigned long, struct pt_regs*); unsigned long data; }; diff --git a/trunk/include/asm-sh/cpu-sh4/ubc.h b/trunk/include/asm-sh/cpu-sh4/ubc.h index c86e17050935..3d0943167659 100644 --- a/trunk/include/asm-sh/cpu-sh4/ubc.h +++ b/trunk/include/asm-sh/cpu-sh4/ubc.h @@ -3,7 +3,6 @@ * * Copyright (C) 1999 Niibe Yutaka * Copyright (C) 2003 Paul Mundt - * Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -12,41 +11,6 @@ #ifndef __ASM_CPU_SH4_UBC_H #define __ASM_CPU_SH4_UBC_H -#if defined(CONFIG_CPU_SH4A) -#define UBC_CBR0 0xff200000 -#define UBC_CRR0 0xff200004 -#define UBC_CAR0 0xff200008 -#define UBC_CAMR0 0xff20000c -#define UBC_CBR1 0xff200020 -#define UBC_CRR1 0xff200024 -#define UBC_CAR1 0xff200028 -#define UBC_CAMR1 0xff20002c -#define UBC_CDR1 0xff200030 -#define UBC_CDMR1 0xff200034 -#define UBC_CETR1 0xff200038 -#define UBC_CCMFR 0xff200600 -#define UBC_CBCR 0xff200620 - -/* CBR */ -#define UBC_CBR_AIE (0x01<<30) -#define UBC_CBR_ID_INST (0x01<<4) -#define UBC_CBR_RW_READ (0x01<<1) -#define UBC_CBR_CE (0x01) - -#define UBC_CBR_AIV_MASK (0x00FF0000) -#define UBC_CBR_AIV_SHIFT (16) -#define UBC_CBR_AIV_SET(asid) (((asid)< - -extern atomic_t irq_err_count; - #endif /* __ASM_SH_HW_IRQ_H */ diff --git a/trunk/include/asm-sh/io.h b/trunk/include/asm-sh/io.h index a0e55b09e4fd..ed12d38e8c00 100644 --- a/trunk/include/asm-sh/io.h +++ b/trunk/include/asm-sh/io.h @@ -304,6 +304,22 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags) #define iounmap(addr) \ __iounmap((addr)) +static inline int check_signature(char __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* * The caches on some architectures aren't dma-coherent and have need to * handle this in software. There are three types of operations that diff --git a/trunk/include/asm-sh/irq.h b/trunk/include/asm-sh/irq.h index 28996f9c58cc..0e5f365aff70 100644 --- a/trunk/include/asm-sh/irq.h +++ b/trunk/include/asm-sh/irq.h @@ -697,15 +697,13 @@ extern int ipr_irq_demux(int irq); #define INTC2_INTPRI_OFFSET 0x00 -struct intc2_data { - unsigned short irq; - unsigned char ipr_offset, ipr_shift; - unsigned char msk_offset, msk_shift; - unsigned char priority; -}; - -void make_intc2_irq(struct intc2_data *); +void make_intc2_irq(unsigned int irq, + unsigned int ipr_offset, unsigned int ipr_shift, + unsigned int msk_offset, unsigned int msk_shift, + unsigned int priority); void init_IRQ_intc2(void); +void intc2_add_clear_irq(int irq, int (*fn)(int)); + #endif extern int shmse_irq_demux(int irq); diff --git a/trunk/include/asm-sh/irq_regs.h b/trunk/include/asm-sh/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/trunk/include/asm-sh/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/trunk/include/asm-sh/timer.h b/trunk/include/asm-sh/timer.h index 5df842bcf7b6..c7ab28095ba0 100644 --- a/trunk/include/asm-sh/timer.h +++ b/trunk/include/asm-sh/timer.h @@ -8,9 +8,8 @@ struct sys_timer_ops { int (*init)(void); int (*start)(void); int (*stop)(void); -#ifndef CONFIG_GENERIC_TIME unsigned long (*get_offset)(void); -#endif + unsigned long (*get_frequency)(void); }; struct sys_timer { @@ -25,17 +24,21 @@ struct sys_timer { extern struct sys_timer tmu_timer; extern struct sys_timer *sys_timer; -#ifndef CONFIG_GENERIC_TIME static inline unsigned long get_timer_offset(void) { return sys_timer->ops->get_offset(); } -#endif + +static inline unsigned long get_timer_frequency(void) +{ + return sys_timer->ops->get_frequency(); +} /* arch/sh/kernel/timers/timer.c */ struct sys_timer *get_sys_timer(void); /* arch/sh/kernel/time.c */ -void handle_timer_tick(void); +void handle_timer_tick(struct pt_regs *); #endif /* __ASM_SH_TIMER_H */ + diff --git a/trunk/include/asm-sh64/io.h b/trunk/include/asm-sh64/io.h index 14d8e7b4bf4b..252fedbb6621 100644 --- a/trunk/include/asm-sh64/io.h +++ b/trunk/include/asm-sh64/io.h @@ -178,6 +178,22 @@ extern void iounmap(void *addr); unsigned long onchip_remap(unsigned long addr, unsigned long size, const char* name); extern void onchip_unmap(unsigned long vaddr); +static __inline__ int check_signature(volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* * The caches on some architectures aren't dma-coherent and have need to * handle this in software. There are three types of operations that diff --git a/trunk/include/asm-sparc64/io.h b/trunk/include/asm-sparc64/io.h index 30b912d8e8bc..0056770e83ad 100644 --- a/trunk/include/asm-sparc64/io.h +++ b/trunk/include/asm-sparc64/io.h @@ -440,6 +440,21 @@ _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) #define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz) +static inline int check_signature(void __iomem *io_addr, + const unsigned char *signature, + int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature++) + goto out; + io_addr++; + } while (--length); + retval = 1; +out: + return retval; +} + #define mmiowb() #ifdef __KERNEL__ diff --git a/trunk/include/asm-x86_64/io.h b/trunk/include/asm-x86_64/io.h index 6ee9fadaaacb..70e91fe76344 100644 --- a/trunk/include/asm-x86_64/io.h +++ b/trunk/include/asm-x86_64/io.h @@ -254,6 +254,33 @@ void memset_io(volatile void __iomem *a, int b, size_t c); #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +static inline int check_signature(void __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) diff --git a/trunk/include/asm-x86_64/msr.h b/trunk/include/asm-x86_64/msr.h index 207fed998a0b..37e194169fac 100644 --- a/trunk/include/asm-x86_64/msr.h +++ b/trunk/include/asm-x86_64/msr.h @@ -189,7 +189,6 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_IA32_PERFCTR0 0xc1 #define MSR_IA32_PERFCTR1 0xc2 -#define MSR_FSB_FREQ 0xcd #define MSR_MTRRcap 0x0fe #define MSR_IA32_BBL_CR_CTL 0x119 @@ -308,9 +307,6 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_PERF_CTL 0x199 -#define MSR_IA32_MPERF 0xE7 -#define MSR_IA32_APERF 0xE8 - #define MSR_IA32_THERM_CONTROL 0x19a #define MSR_IA32_THERM_INTERRUPT 0x19b #define MSR_IA32_THERM_STATUS 0x19c diff --git a/trunk/include/asm-x86_64/processor.h b/trunk/include/asm-x86_64/processor.h index cef17e0f828c..de9c3147ee4c 100644 --- a/trunk/include/asm-x86_64/processor.h +++ b/trunk/include/asm-x86_64/processor.h @@ -475,8 +475,6 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) : :"a" (eax), "c" (ecx)); } -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); - #define stack_current() \ ({ \ struct thread_info *ti; \ diff --git a/trunk/include/linux/acpi.h b/trunk/include/linux/acpi.h index 2b0c955590fe..88b5dfd8ee12 100644 --- a/trunk/include/linux/acpi.h +++ b/trunk/include/linux/acpi.h @@ -494,9 +494,6 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver); extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); -extern int ec_transaction(u8 command, - const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len); #endif /*CONFIG_ACPI_EC*/ diff --git a/trunk/include/linux/bitmap.h b/trunk/include/linux/bitmap.h index 64b4641904fe..dcc5de7cc487 100644 --- a/trunk/include/linux/bitmap.h +++ b/trunk/include/linux/bitmap.h @@ -46,8 +46,7 @@ * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf - * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf - * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_parse(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region @@ -107,9 +106,7 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits); extern int bitmap_scnprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); -extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, - unsigned long *dst, int nbits); -extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, +extern int bitmap_parse(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); extern int bitmap_scnlistprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); @@ -273,12 +270,6 @@ static inline void bitmap_shift_left(unsigned long *dst, __bitmap_shift_left(dst, src, n, nbits); } -static inline int bitmap_parse(const char *buf, unsigned int buflen, - unsigned long *maskp, int nmaskbits) -{ - return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); -} - #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ diff --git a/trunk/include/linux/blkdev.h b/trunk/include/linux/blkdev.h index d370d2cfe138..26f7856ff812 100644 --- a/trunk/include/linux/blkdev.h +++ b/trunk/include/linux/blkdev.h @@ -157,7 +157,6 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_CMD, REQ_TYPE_ATA_TASK, REQ_TYPE_ATA_TASKFILE, - REQ_TYPE_ATA_PC, }; /* diff --git a/trunk/include/linux/carta_random32.h b/trunk/include/linux/carta_random32.h deleted file mode 100644 index f6f3bd9f20b5..000000000000 --- a/trunk/include/linux/carta_random32.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Fast, simple, yet decent quality random number generator based on - * a paper by David G. Carta ("Two Fast Implementations of the - * `Minimal Standard' Random Number Generator," Communications of the - * ACM, January, 1990). - * - * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. - * Contributed by Stephane Eranian - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307 USA - */ -#ifndef _LINUX_CARTA_RANDOM32_H_ -#define _LINUX_CARTA_RANDOM32_H_ - -u64 carta_random32(u64 seed); - -#endif /* _LINUX_CARTA_RANDOM32_H_ */ diff --git a/trunk/include/linux/compat_ioctl.h b/trunk/include/linux/compat_ioctl.h index cfdb4f6a89d4..4e1663d7691e 100644 --- a/trunk/include/linux/compat_ioctl.h +++ b/trunk/include/linux/compat_ioctl.h @@ -61,23 +61,17 @@ COMPATIBLE_IOCTL(FIGETBSZ) * Some need translations, these do not. */ COMPATIBLE_IOCTL(HDIO_GET_IDENTITY) -COMPATIBLE_IOCTL(HDIO_DRIVE_TASK) +COMPATIBLE_IOCTL(HDIO_SET_DMA) +COMPATIBLE_IOCTL(HDIO_SET_UNMASKINTR) +COMPATIBLE_IOCTL(HDIO_SET_NOWERR) +COMPATIBLE_IOCTL(HDIO_SET_32BIT) +COMPATIBLE_IOCTL(HDIO_SET_MULTCOUNT) COMPATIBLE_IOCTL(HDIO_DRIVE_CMD) -ULONG_IOCTL(HDIO_SET_MULTCOUNT) -ULONG_IOCTL(HDIO_SET_UNMASKINTR) -ULONG_IOCTL(HDIO_SET_KEEPSETTINGS) -ULONG_IOCTL(HDIO_SET_32BIT) -ULONG_IOCTL(HDIO_SET_NOWERR) -ULONG_IOCTL(HDIO_SET_DMA) -ULONG_IOCTL(HDIO_SET_PIO_MODE) -ULONG_IOCTL(HDIO_SET_NICE) -ULONG_IOCTL(HDIO_SET_WCACHE) -ULONG_IOCTL(HDIO_SET_ACOUSTIC) -ULONG_IOCTL(HDIO_SET_BUSSTATE) -ULONG_IOCTL(HDIO_SET_ADDRESS) +COMPATIBLE_IOCTL(HDIO_DRIVE_TASK) +COMPATIBLE_IOCTL(HDIO_SET_PIO_MODE) +COMPATIBLE_IOCTL(HDIO_SET_NICE) +COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS) COMPATIBLE_IOCTL(HDIO_SCAN_HWIF) -/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */ -COMPATIBLE_IOCTL(0x330) /* 0x02 -- Floppy ioctls */ COMPATIBLE_IOCTL(FDMSGON) COMPATIBLE_IOCTL(FDMSGOFF) diff --git a/trunk/include/linux/cpufreq.h b/trunk/include/linux/cpufreq.h index 7f008f6bfdc3..4ea39fee99c7 100644 --- a/trunk/include/linux/cpufreq.h +++ b/trunk/include/linux/cpufreq.h @@ -172,8 +172,6 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); -extern int cpufreq_driver_getavg(struct cpufreq_policy *policy); - int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); @@ -206,7 +204,6 @@ struct cpufreq_driver { unsigned int (*get) (unsigned int cpu); /* optional */ - unsigned int (*getavg) (unsigned int cpu); int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); diff --git a/trunk/include/linux/cpumask.h b/trunk/include/linux/cpumask.h index d0e8c8b0e34d..b268a3c0c376 100644 --- a/trunk/include/linux/cpumask.h +++ b/trunk/include/linux/cpumask.h @@ -8,8 +8,8 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these cpumasks are based. * - * For details of cpumask_scnprintf() and cpumask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. + * For details of cpumask_scnprintf() and cpumask_parse(), + * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c. * For details of cpulist_scnprintf() and cpulist_parse(), see * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c @@ -49,7 +49,7 @@ * unsigned long *cpus_addr(mask) Array of unsigned long's in mask * * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing - * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask + * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing * int cpulist_parse(buf, map) Parse ascii string as cpulist * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) @@ -273,12 +273,12 @@ static inline int __cpumask_scnprintf(char *buf, int len, return bitmap_scnprintf(buf, len, srcp->bits, nbits); } -#define cpumask_parse_user(ubuf, ulen, dst) \ - __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) -static inline int __cpumask_parse_user(const char __user *buf, int len, +#define cpumask_parse(ubuf, ulen, dst) \ + __cpumask_parse((ubuf), (ulen), &(dst), NR_CPUS) +static inline int __cpumask_parse(const char __user *buf, int len, cpumask_t *dstp, int nbits) { - return bitmap_parse_user(buf, len, dstp->bits, nbits); + return bitmap_parse(buf, len, dstp->bits, nbits); } #define cpulist_scnprintf(buf, len, src) \ diff --git a/trunk/include/linux/dcache.h b/trunk/include/linux/dcache.h index 63f64a9a5bf7..44605be59409 100644 --- a/trunk/include/linux/dcache.h +++ b/trunk/include/linux/dcache.h @@ -230,7 +230,6 @@ extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); -extern void shrink_dcache_for_umount(struct super_block *); extern int d_invalidate(struct dentry *); /* only used at mount-time */ diff --git a/trunk/include/linux/elevator.h b/trunk/include/linux/elevator.h index 2fa9f1144228..b3370ef5164d 100644 --- a/trunk/include/linux/elevator.h +++ b/trunk/include/linux/elevator.h @@ -70,6 +70,7 @@ struct elevator_type { struct list_head list; struct elevator_ops ops; + struct elevator_type *elevator_type; struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; diff --git a/trunk/include/linux/ext4_fs.h b/trunk/include/linux/ext4_fs.h deleted file mode 100644 index 498503ee613d..000000000000 --- a/trunk/include/linux/ext4_fs.h +++ /dev/null @@ -1,994 +0,0 @@ -/* - * linux/include/linux/ext4_fs.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_EXT4_FS_H -#define _LINUX_EXT4_FS_H - -#include -#include -#include - -/* - * The second extended filesystem constants/structures - */ - -/* - * Define EXT4FS_DEBUG to produce debug messages - */ -#undef EXT4FS_DEBUG - -/* - * Define EXT4_RESERVATION to reserve data blocks for expanding files - */ -#define EXT4_DEFAULT_RESERVE_BLOCKS 8 -/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ -#define EXT4_MAX_RESERVE_BLOCKS 1027 -#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 -/* - * Always enable hashed directories - */ -#define CONFIG_EXT4_INDEX - -/* - * Debug code - */ -#ifdef EXT4FS_DEBUG -#define ext4_debug(f, a...) \ - do { \ - printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ - __FILE__, __LINE__, __FUNCTION__); \ - printk (KERN_DEBUG f, ## a); \ - } while (0) -#else -#define ext4_debug(f, a...) do {} while (0) -#endif - -/* - * Special inodes numbers - */ -#define EXT4_BAD_INO 1 /* Bad blocks inode */ -#define EXT4_ROOT_INO 2 /* Root inode */ -#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ -#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ -#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ -#define EXT4_JOURNAL_INO 8 /* Journal inode */ - -/* First non-reserved inode for old ext4 filesystems */ -#define EXT4_GOOD_OLD_FIRST_INO 11 - -/* - * Maximal count of links to a file - */ -#define EXT4_LINK_MAX 32000 - -/* - * Macro-instructions used to manage several block sizes - */ -#define EXT4_MIN_BLOCK_SIZE 1024 -#define EXT4_MAX_BLOCK_SIZE 4096 -#define EXT4_MIN_BLOCK_LOG_SIZE 10 -#ifdef __KERNEL__ -# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) -#else -# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) -#endif -#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) -#ifdef __KERNEL__ -# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -#else -# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) -#endif -#ifdef __KERNEL__ -#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) -#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) -#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) -#else -#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ - EXT4_GOOD_OLD_INODE_SIZE : \ - (s)->s_inode_size) -#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ - EXT4_GOOD_OLD_FIRST_INO : \ - (s)->s_first_ino) -#endif - -/* - * Macro-instructions used to manage fragments - */ -#define EXT4_MIN_FRAG_SIZE 1024 -#define EXT4_MAX_FRAG_SIZE 4096 -#define EXT4_MIN_FRAG_LOG_SIZE 10 -#ifdef __KERNEL__ -# define EXT4_FRAG_SIZE(s) (EXT4_SB(s)->s_frag_size) -# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_SB(s)->s_frags_per_block) -#else -# define EXT4_FRAG_SIZE(s) (EXT4_MIN_FRAG_SIZE << (s)->s_log_frag_size) -# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_FRAG_SIZE(s)) -#endif - -/* - * Structure of a blocks group descriptor - */ -struct ext4_group_desc -{ - __le32 bg_block_bitmap; /* Blocks bitmap block */ - __le32 bg_inode_bitmap; /* Inodes bitmap block */ - __le32 bg_inode_table; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ - __u16 bg_flags; - __u32 bg_reserved[3]; - __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ - __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ - __le32 bg_inode_table_hi; /* Inodes table block MSB */ -}; - -#ifdef __KERNEL__ -#include -#include -#endif -/* - * Macro-instructions used to manage group descriptors - */ -#define EXT4_MIN_DESC_SIZE 32 -#define EXT4_MIN_DESC_SIZE_64BIT 64 -#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE -#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) -#ifdef __KERNEL__ -# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) -# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) -# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) -# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) -#else -# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) -# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) -# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) -#endif - -/* - * Constants relative to the data blocks - */ -#define EXT4_NDIR_BLOCKS 12 -#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS -#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) -#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) -#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) - -/* - * Inode flags - */ -#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ -#define EXT4_UNRM_FL 0x00000002 /* Undelete */ -#define EXT4_COMPR_FL 0x00000004 /* Compress file */ -#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ -#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ -#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define EXT4_DIRTY_FL 0x00000100 -#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ -#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ -/* End compression flags --- maybe not all used */ -#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ -#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ - -#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - -/* - * Inode dynamic state flags - */ -#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ -#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ -#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ - -/* Used to pass group descriptor data when online resize is done */ -struct ext4_new_group_input { - __u32 group; /* Group number for this data */ - __u64 block_bitmap; /* Absolute block number of block bitmap */ - __u64 inode_bitmap; /* Absolute block number of inode bitmap */ - __u64 inode_table; /* Absolute block number of inode table start */ - __u32 blocks_count; /* Total number of blocks in this group */ - __u16 reserved_blocks; /* Number of reserved blocks in this group */ - __u16 unused; -}; - -/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ -struct ext4_new_group_data { - __u32 group; - __u64 block_bitmap; - __u64 inode_bitmap; - __u64 inode_table; - __u32 blocks_count; - __u16 reserved_blocks; - __u16 unused; - __u32 free_blocks_count; -}; - - -/* - * ioctl commands - */ -#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS -#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS -#define EXT4_IOC_GETVERSION _IOR('f', 3, long) -#define EXT4_IOC_SETVERSION _IOW('f', 4, long) -#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) -#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION -#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#ifdef CONFIG_JBD_DEBUG -#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) -#endif -#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) -#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) - -/* - * ioctl commands in 32 bit emulation - */ -#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) -#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) -#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) -#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) -#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG -#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -#endif -#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION -#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION - - -/* - * Mount options - */ -struct ext4_mount_options { - unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; - unsigned long s_commit_interval; -#ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; -#endif -}; - -/* - * Structure of an inode on the disk - */ -struct ext4_inode { - __le16 i_mode; /* File mode */ - __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size; /* Size in bytes */ - __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Creation time */ - __le32 i_mtime; /* Modification time */ - __le32 i_dtime; /* Deletion Time */ - __le16 i_gid; /* Low 16 bits of Group Id */ - __le16 i_links_count; /* Links count */ - __le32 i_blocks; /* Blocks count */ - __le32 i_flags; /* File flags */ - union { - struct { - __u32 l_i_reserved1; - } linux1; - struct { - __u32 h_i_translator; - } hurd1; - struct { - __u32 m_i_reserved1; - } masix1; - } osd1; /* OS dependent 1 */ - __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ - __le32 i_generation; /* File version (for NFS) */ - __le32 i_file_acl; /* File ACL */ - __le32 i_dir_acl; /* Directory ACL */ - __le32 i_faddr; /* Fragment address */ - union { - struct { - __u8 l_i_frag; /* Fragment number */ - __u8 l_i_fsize; /* Fragment size */ - __le16 l_i_file_acl_high; - __le16 l_i_uid_high; /* these 2 fields */ - __le16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; - } linux2; - struct { - __u8 h_i_frag; /* Fragment number */ - __u8 h_i_fsize; /* Fragment size */ - __u16 h_i_mode_high; - __u16 h_i_uid_high; - __u16 h_i_gid_high; - __u32 h_i_author; - } hurd2; - struct { - __u8 m_i_frag; /* Fragment number */ - __u8 m_i_fsize; /* Fragment size */ - __le16 m_i_file_acl_high; - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ - __le16 i_extra_isize; - __le16 i_pad1; -}; - -#define i_size_high i_dir_acl - -#if defined(__KERNEL__) || defined(__linux__) -#define i_reserved1 osd1.linux1.l_i_reserved1 -#define i_frag osd2.linux2.l_i_frag -#define i_fsize osd2.linux2.l_i_fsize -#define i_file_acl_high osd2.linux2.l_i_file_acl_high -#define i_uid_low i_uid -#define i_gid_low i_gid -#define i_uid_high osd2.linux2.l_i_uid_high -#define i_gid_high osd2.linux2.l_i_gid_high -#define i_reserved2 osd2.linux2.l_i_reserved2 - -#elif defined(__GNU__) - -#define i_translator osd1.hurd1.h_i_translator -#define i_frag osd2.hurd2.h_i_frag; -#define i_fsize osd2.hurd2.h_i_fsize; -#define i_uid_high osd2.hurd2.h_i_uid_high -#define i_gid_high osd2.hurd2.h_i_gid_high -#define i_author osd2.hurd2.h_i_author - -#elif defined(__masix__) - -#define i_reserved1 osd1.masix1.m_i_reserved1 -#define i_frag osd2.masix2.m_i_frag -#define i_fsize osd2.masix2.m_i_fsize -#define i_file_acl_high osd2.masix2.m_i_file_acl_high -#define i_reserved2 osd2.masix2.m_i_reserved2 - -#endif /* defined(__KERNEL__) || defined(__linux__) */ - -/* - * File system states - */ -#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ -#define EXT4_ERROR_FS 0x0002 /* Errors detected */ -#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ - -/* - * Mount flags - */ -#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ -#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ -#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ -#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ -#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ -#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ -#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ -#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ -#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ -#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ -#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ -#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ -#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ -#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ -#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ -#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ -#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ -#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ -#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ -#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ -#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ -#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ -#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ - -/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ -#ifndef _LINUX_EXT2_FS_H -#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt -#define set_opt(o, opt) o |= EXT4_MOUNT_##opt -#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ - EXT4_MOUNT_##opt) -#else -#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD -#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT -#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS -#endif - -#define ext4_set_bit ext2_set_bit -#define ext4_set_bit_atomic ext2_set_bit_atomic -#define ext4_clear_bit ext2_clear_bit -#define ext4_clear_bit_atomic ext2_clear_bit_atomic -#define ext4_test_bit ext2_test_bit -#define ext4_find_first_zero_bit ext2_find_first_zero_bit -#define ext4_find_next_zero_bit ext2_find_next_zero_bit - -/* - * Maximal mount counts between two filesystem checks - */ -#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ -#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ - -/* - * Behaviour when detecting errors - */ -#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ -#define EXT4_ERRORS_RO 2 /* Remount fs read-only */ -#define EXT4_ERRORS_PANIC 3 /* Panic */ -#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE - -/* - * Structure of the super block - */ -struct ext4_super_block { -/*00*/ __le32 s_inodes_count; /* Inodes count */ - __le32 s_blocks_count; /* Blocks count */ - __le32 s_r_blocks_count; /* Reserved blocks count */ - __le32 s_free_blocks_count; /* Free blocks count */ -/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ - __le32 s_first_data_block; /* First Data Block */ - __le32 s_log_block_size; /* Block size */ - __le32 s_log_frag_size; /* Fragment size */ -/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ - __le32 s_frags_per_group; /* # Fragments per group */ - __le32 s_inodes_per_group; /* # Inodes per group */ - __le32 s_mtime; /* Mount time */ -/*30*/ __le32 s_wtime; /* Write time */ - __le16 s_mnt_count; /* Mount count */ - __le16 s_max_mnt_count; /* Maximal mount count */ - __le16 s_magic; /* Magic signature */ - __le16 s_state; /* File system state */ - __le16 s_errors; /* Behaviour when detecting errors */ - __le16 s_minor_rev_level; /* minor revision level */ -/*40*/ __le32 s_lastcheck; /* time of last check */ - __le32 s_checkinterval; /* max. time between checks */ - __le32 s_creator_os; /* OS */ - __le32 s_rev_level; /* Revision level */ -/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ - __le16 s_def_resgid; /* Default gid for reserved blocks */ - /* - * These fields are for EXT4_DYNAMIC_REV superblocks only. - * - * Note: the difference between the compatible feature set and - * the incompatible feature set is that if there is a bit set - * in the incompatible feature set that the kernel doesn't - * know about, it should refuse to mount the filesystem. - * - * e2fsck's requirements are more strict; if it doesn't know - * about a feature in either the compatible or incompatible - * feature set, it must abort and not try to meddle with - * things it doesn't understand... - */ - __le32 s_first_ino; /* First non-reserved inode */ - __le16 s_inode_size; /* size of inode structure */ - __le16 s_block_group_nr; /* block group # of this superblock */ - __le32 s_feature_compat; /* compatible feature set */ -/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ - __le32 s_feature_ro_compat; /* readonly-compatible feature set */ -/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*78*/ char s_volume_name[16]; /* volume name */ -/*88*/ char s_last_mounted[64]; /* directory where last mounted */ -/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ - /* - * Performance hints. Directory preallocation should only - * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. - */ - __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ - __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ - __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ - /* - * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. - */ -/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ -/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ - __le32 s_journal_dev; /* device number of journal file */ - __le32 s_last_orphan; /* start of list of inodes to delete */ - __le32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ - __u8 s_reserved_char_pad; - __le16 s_desc_size; /* size of group descriptor */ -/*100*/ __le32 s_default_mount_opts; - __le32 s_first_meta_bg; /* First metablock block group */ - __le32 s_mkfs_time; /* When the filesystem was created */ - __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ - /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ -/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ - __le32 s_r_blocks_count_hi; /* Reserved blocks count */ - __le32 s_free_blocks_count_hi; /* Free blocks count */ - __u32 s_reserved[169]; /* Padding to the end of the block */ -}; - -#ifdef __KERNEL__ -static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} -static inline struct ext4_inode_info *EXT4_I(struct inode *inode) -{ - return container_of(inode, struct ext4_inode_info, vfs_inode); -} - -static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) -{ - return ino == EXT4_ROOT_INO || - ino == EXT4_JOURNAL_INO || - ino == EXT4_RESIZE_INO || - (ino >= EXT4_FIRST_INO(sb) && - ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); -} -#else -/* Assume that user mode programs are passing in an ext4fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test - * macros from user land. */ -#define EXT4_SB(sb) (sb) -#endif - -#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime - -/* - * Codes for operating systems - */ -#define EXT4_OS_LINUX 0 -#define EXT4_OS_HURD 1 -#define EXT4_OS_MASIX 2 -#define EXT4_OS_FREEBSD 3 -#define EXT4_OS_LITES 4 - -/* - * Revision levels - */ -#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ -#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ - -#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV -#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV - -#define EXT4_GOOD_OLD_INODE_SIZE 128 - -/* - * Feature set definitions - */ - -#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) -#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) -#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) -#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) -#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) -#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) -#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) -#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) -#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) - -#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 -#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 -#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 -#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 -#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 -#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 - -#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 -#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 -#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 - -#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 -#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 -#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ -#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 -#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ -#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 - -#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR -#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ - EXT4_FEATURE_INCOMPAT_RECOVER| \ - EXT4_FEATURE_INCOMPAT_META_BG| \ - EXT4_FEATURE_INCOMPAT_EXTENTS| \ - EXT4_FEATURE_INCOMPAT_64BIT) -#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_BTREE_DIR) - -/* - * Default values for user and/or group using reserved blocks - */ -#define EXT4_DEF_RESUID 0 -#define EXT4_DEF_RESGID 0 - -/* - * Default mount options - */ -#define EXT4_DEFM_DEBUG 0x0001 -#define EXT4_DEFM_BSDGROUPS 0x0002 -#define EXT4_DEFM_XATTR_USER 0x0004 -#define EXT4_DEFM_ACL 0x0008 -#define EXT4_DEFM_UID16 0x0010 -#define EXT4_DEFM_JMODE 0x0060 -#define EXT4_DEFM_JMODE_DATA 0x0020 -#define EXT4_DEFM_JMODE_ORDERED 0x0040 -#define EXT4_DEFM_JMODE_WBACK 0x0060 - -/* - * Structure of a directory entry - */ -#define EXT4_NAME_LEN 255 - -struct ext4_dir_entry { - __le32 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __le16 name_len; /* Name length */ - char name[EXT4_NAME_LEN]; /* File name */ -}; - -/* - * The new version of the directory entry. Since EXT4 structures are - * stored in intel byte order, and the name_len field could never be - * bigger than 255 chars, it's safe to reclaim the extra byte for the - * file_type field. - */ -struct ext4_dir_entry_2 { - __le32 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __u8 name_len; /* Name length */ - __u8 file_type; - char name[EXT4_NAME_LEN]; /* File name */ -}; - -/* - * Ext4 directory file types. Only the low 3 bits are used. The - * other bits are reserved for now. - */ -#define EXT4_FT_UNKNOWN 0 -#define EXT4_FT_REG_FILE 1 -#define EXT4_FT_DIR 2 -#define EXT4_FT_CHRDEV 3 -#define EXT4_FT_BLKDEV 4 -#define EXT4_FT_FIFO 5 -#define EXT4_FT_SOCK 6 -#define EXT4_FT_SYMLINK 7 - -#define EXT4_FT_MAX 8 - -/* - * EXT4_DIR_PAD defines the directory entries boundaries - * - * NOTE: It must be a multiple of 4 - */ -#define EXT4_DIR_PAD 4 -#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ - ~EXT4_DIR_ROUND) -/* - * Hash Tree Directory indexing - * (c) Daniel Phillips, 2001 - */ - -#ifdef CONFIG_EXT4_INDEX - #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ - EXT4_FEATURE_COMPAT_DIR_INDEX) && \ - (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) -#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) -#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -#else - #define is_dx(dir) 0 -#define EXT4_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT4_LINK_MAX) -#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -#endif - -/* Legal values for the dx_root hash_version field: */ - -#define DX_HASH_LEGACY 0 -#define DX_HASH_HALF_MD4 1 -#define DX_HASH_TEA 2 - -#ifdef __KERNEL__ - -/* hash info structure used by the directory hash */ -struct dx_hash_info -{ - u32 hash; - u32 minor_hash; - int hash_version; - u32 *seed; -}; - -#define EXT4_HTREE_EOF 0x7fffffff - -/* - * Control parameters used by ext4_htree_next_block - */ -#define HASH_NB_ALWAYS 1 - - -/* - * Describe an inode's exact location on disk and in memory - */ -struct ext4_iloc -{ - struct buffer_head *bh; - unsigned long offset; - unsigned long block_group; -}; - -static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) -{ - return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); -} - -/* - * This structure is stuffed into the struct file's private_data field - * for directories. It is where we put information so that we can do - * readdir operations in hash tree order. - */ -struct dir_private_info { - struct rb_root root; - struct rb_node *curr_node; - struct fname *extra_fname; - loff_t last_pos; - __u32 curr_hash; - __u32 curr_minor_hash; - __u32 next_hash; -}; - -/* calculate the first block number of the group */ -static inline ext4_fsblk_t -ext4_group_first_block_no(struct super_block *sb, unsigned long group_no) -{ - return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); -} - -/* - * Special error return code only used by dx_probe() and its callers. - */ -#define ERR_BAD_DX_DIR -75000 - -void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, - unsigned long *blockgrpp, ext4_grpblk_t *offsetp); - -/* - * Function prototypes - */ - -/* - * Ok, these declarations are also in but none of the - * ext4 source programs needs to include it so they are duplicated here. - */ -# define NORET_TYPE /**/ -# define ATTRIB_NORET __attribute__((noreturn)) -# define NORET_AND noreturn, - -/* balloc.c */ -extern unsigned int ext4_block_group(struct super_block *sb, - ext4_fsblk_t blocknr); -extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, - ext4_fsblk_t blocknr); -extern int ext4_bg_has_super(struct super_block *sb, int group); -extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group); -extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp); -extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); -extern void ext4_free_blocks (handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count); -extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count, - unsigned long *pdquot_freed_blocks); -extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); -extern void ext4_check_blocks_bitmap (struct super_block *); -extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, - unsigned int block_group, - struct buffer_head ** bh); -extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); -extern void ext4_init_block_alloc_info(struct inode *); -extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); - -/* dir.c */ -extern int ext4_check_dir_entry(const char *, struct inode *, - struct ext4_dir_entry_2 *, - struct buffer_head *, unsigned long); -extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext4_dir_entry_2 *dirent); -extern void ext4_htree_free_dir_info(struct dir_private_info *p); - -/* fsync.c */ -extern int ext4_sync_file (struct file *, struct dentry *, int); - -/* hash.c */ -extern int ext4fs_dirhash(const char *name, int len, struct - dx_hash_info *hinfo); - -/* ialloc.c */ -extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); -extern void ext4_free_inode (handle_t *, struct inode *); -extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); -extern unsigned long ext4_count_free_inodes (struct super_block *); -extern unsigned long ext4_count_dirs (struct super_block *); -extern void ext4_check_inodes_bitmap (struct super_block *); -extern unsigned long ext4_count_free (struct buffer_head *, unsigned); - - -/* inode.c */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr); -struct buffer_head * ext4_getblk (handle_t *, struct inode *, long, int, int *); -struct buffer_head * ext4_bread (handle_t *, struct inode *, int, int, int *); -int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, - sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, - int create, int extend_disksize); - -extern void ext4_read_inode (struct inode *); -extern int ext4_write_inode (struct inode *, int); -extern int ext4_setattr (struct dentry *, struct iattr *); -extern void ext4_delete_inode (struct inode *); -extern int ext4_sync_inode (handle_t *, struct inode *); -extern void ext4_discard_reservation (struct inode *); -extern void ext4_dirty_inode(struct inode *); -extern int ext4_change_inode_journal_flag(struct inode *, int); -extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); -extern void ext4_truncate (struct inode *); -extern void ext4_set_inode_flags(struct inode *); -extern void ext4_set_aops(struct inode *inode); -extern int ext4_writepage_trans_blocks(struct inode *); -extern int ext4_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from); - -/* ioctl.c */ -extern int ext4_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); -extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); - -/* namei.c */ -extern int ext4_orphan_add(handle_t *, struct inode *); -extern int ext4_orphan_del(handle_t *, struct inode *); -extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, - __u32 start_minor_hash, __u32 *next_hash); - -/* resize.c */ -extern int ext4_group_add(struct super_block *sb, - struct ext4_new_group_data *input); -extern int ext4_group_extend(struct super_block *sb, - struct ext4_super_block *es, - ext4_fsblk_t n_blocks_count); - -/* super.c */ -extern void ext4_error (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void __ext4_std_error (struct super_block *, const char *, int); -extern void ext4_abort (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext4_update_dynamic_rev (struct super_block *sb); -extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, - struct ext4_group_desc *bg); -extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, - struct ext4_group_desc *bg); -extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, - struct ext4_group_desc *bg); -extern void ext4_block_bitmap_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk); -extern void ext4_inode_bitmap_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk); -extern void ext4_inode_table_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk); - -static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) -{ - return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | - le32_to_cpu(es->s_blocks_count); -} - -static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) -{ - return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | - le32_to_cpu(es->s_r_blocks_count); -} - -static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) -{ - return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | - le32_to_cpu(es->s_free_blocks_count); -} - -static inline void ext4_blocks_count_set(struct ext4_super_block *es, - ext4_fsblk_t blk) -{ - es->s_blocks_count = cpu_to_le32((u32)blk); - es->s_blocks_count_hi = cpu_to_le32(blk >> 32); -} - -static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, - ext4_fsblk_t blk) -{ - es->s_free_blocks_count = cpu_to_le32((u32)blk); - es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); -} - -static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, - ext4_fsblk_t blk) -{ - es->s_r_blocks_count = cpu_to_le32((u32)blk); - es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); -} - - - -#define ext4_std_error(sb, errno) \ -do { \ - if ((errno)) \ - __ext4_std_error((sb), __FUNCTION__, (errno)); \ -} while (0) - -/* - * Inodes and files operations - */ - -/* dir.c */ -extern const struct file_operations ext4_dir_operations; - -/* file.c */ -extern struct inode_operations ext4_file_inode_operations; -extern const struct file_operations ext4_file_operations; - -/* namei.c */ -extern struct inode_operations ext4_dir_inode_operations; -extern struct inode_operations ext4_special_inode_operations; - -/* symlink.c */ -extern struct inode_operations ext4_symlink_inode_operations; -extern struct inode_operations ext4_fast_symlink_inode_operations; - -/* extents.c */ -extern int ext4_ext_tree_init(handle_t *handle, struct inode *); -extern int ext4_ext_writepage_trans_blocks(struct inode *, int); -extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t iblock, - unsigned long max_blocks, struct buffer_head *bh_result, - int create, int extend_disksize); -extern void ext4_ext_truncate(struct inode *, struct page *); -extern void ext4_ext_init(struct super_block *); -extern void ext4_ext_release(struct super_block *); -static inline int -ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, - unsigned long max_blocks, struct buffer_head *bh, - int create, int extend_disksize) -{ - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, create, extend_disksize); - return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, - create, extend_disksize); -} - - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EXT4_FS_H */ diff --git a/trunk/include/linux/ext4_fs_extents.h b/trunk/include/linux/ext4_fs_extents.h deleted file mode 100644 index a41cc24568ca..000000000000 --- a/trunk/include/linux/ext4_fs_extents.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com - * Written by Alex Tomas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - */ - -#ifndef _LINUX_EXT4_EXTENTS -#define _LINUX_EXT4_EXTENTS - -#include - -/* - * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks - * becomes very small, so index split, in-depth growing and - * other hard changes happen much more often. - * This is for debug purposes only. - */ -#define AGRESSIVE_TEST_ - -/* - * With EXTENTS_STATS defined, the number of blocks and extents - * are collected in the truncate path. They'll be shown at - * umount time. - */ -#define EXTENTS_STATS__ - -/* - * If CHECK_BINSEARCH is defined, then the results of the binary search - * will also be checked by linear search. - */ -#define CHECK_BINSEARCH__ - -/* - * If EXT_DEBUG is defined you can use the 'extdebug' mount option - * to get lots of info about what's going on. - */ -#define EXT_DEBUG__ -#ifdef EXT_DEBUG -#define ext_debug(a...) printk(a) -#else -#define ext_debug(a...) -#endif - -/* - * If EXT_STATS is defined then stats numbers are collected. - * These number will be displayed at umount time. - */ -#define EXT_STATS_ - - -/* - * ext4_inode has i_block array (60 bytes total). - * The first 12 bytes store ext4_extent_header; - * the remainder stores an array of ext4_extent. - */ - -/* - * This is the extent on-disk structure. - * It's used at the bottom of the tree. - */ -struct ext4_extent { - __le32 ee_block; /* first logical block extent covers */ - __le16 ee_len; /* number of blocks covered by extent */ - __le16 ee_start_hi; /* high 16 bits of physical block */ - __le32 ee_start; /* low 32 bits of physical block */ -}; - -/* - * This is index on-disk structure. - * It's used at all the levels except the bottom. - */ -struct ext4_extent_idx { - __le32 ei_block; /* index covers logical blocks from 'block' */ - __le32 ei_leaf; /* pointer to the physical block of the next * - * level. leaf or next index could be there */ - __le16 ei_leaf_hi; /* high 16 bits of physical block */ - __u16 ei_unused; -}; - -/* - * Each block (leaves and indexes), even inode-stored has header. - */ -struct ext4_extent_header { - __le16 eh_magic; /* probably will support different formats */ - __le16 eh_entries; /* number of valid entries */ - __le16 eh_max; /* capacity of store in entries */ - __le16 eh_depth; /* has tree real underlying blocks? */ - __le32 eh_generation; /* generation of the tree */ -}; - -#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) - -/* - * Array of ext4_ext_path contains path to some extent. - * Creation/lookup routines use it for traversal/splitting/etc. - * Truncate uses it to simulate recursive walking. - */ -struct ext4_ext_path { - ext4_fsblk_t p_block; - __u16 p_depth; - struct ext4_extent *p_ext; - struct ext4_extent_idx *p_idx; - struct ext4_extent_header *p_hdr; - struct buffer_head *p_bh; -}; - -/* - * structure for external API - */ - -#define EXT4_EXT_CACHE_NO 0 -#define EXT4_EXT_CACHE_GAP 1 -#define EXT4_EXT_CACHE_EXTENT 2 - -/* - * to be called by ext4_ext_walk_space() - * negative retcode - error - * positive retcode - signal for ext4_ext_walk_space(), see below - * callback must return valid extent (passed or newly created) - */ -typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, - struct ext4_ext_cache *, - void *); - -#define EXT_CONTINUE 0 -#define EXT_BREAK 1 -#define EXT_REPEAT 2 - - -#define EXT_MAX_BLOCK 0xffffffff - -#define EXT_MAX_LEN ((1UL << 15) - 1) - - -#define EXT_FIRST_EXTENT(__hdr__) \ - ((struct ext4_extent *) (((char *) (__hdr__)) + \ - sizeof(struct ext4_extent_header))) -#define EXT_FIRST_INDEX(__hdr__) \ - ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ - sizeof(struct ext4_extent_header))) -#define EXT_HAS_FREE_INDEX(__path__) \ - (le16_to_cpu((__path__)->p_hdr->eh_entries) \ - < le16_to_cpu((__path__)->p_hdr->eh_max)) -#define EXT_LAST_EXTENT(__hdr__) \ - (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) -#define EXT_LAST_INDEX(__hdr__) \ - (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) -#define EXT_MAX_EXTENT(__hdr__) \ - (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) -#define EXT_MAX_INDEX(__hdr__) \ - (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) - -static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) -{ - return (struct ext4_extent_header *) EXT4_I(inode)->i_data; -} - -static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) -{ - return (struct ext4_extent_header *) bh->b_data; -} - -static inline unsigned short ext_depth(struct inode *inode) -{ - return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); -} - -static inline void ext4_ext_tree_changed(struct inode *inode) -{ - EXT4_I(inode)->i_ext_generation++; -} - -static inline void -ext4_ext_invalidate_cache(struct inode *inode) -{ - EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; -} - -extern int ext4_extent_tree_init(handle_t *, struct inode *); -extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); -extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); -extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); -extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *); - -#endif /* _LINUX_EXT4_EXTENTS */ - diff --git a/trunk/include/linux/ext4_fs_i.h b/trunk/include/linux/ext4_fs_i.h deleted file mode 100644 index bb42379cb7fd..000000000000 --- a/trunk/include/linux/ext4_fs_i.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * linux/include/linux/ext4_fs_i.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_i.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_EXT4_FS_I -#define _LINUX_EXT4_FS_I - -#include -#include -#include -#include - -/* data type for block offset of block group */ -typedef int ext4_grpblk_t; - -/* data type for filesystem-wide blocks number */ -typedef unsigned long long ext4_fsblk_t; - -struct ext4_reserve_window { - ext4_fsblk_t _rsv_start; /* First byte reserved */ - ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ -}; - -struct ext4_reserve_window_node { - struct rb_node rsv_node; - __u32 rsv_goal_size; - __u32 rsv_alloc_hit; - struct ext4_reserve_window rsv_window; -}; - -struct ext4_block_alloc_info { - /* information about reservation window */ - struct ext4_reserve_window_node rsv_window_node; - /* - * was i_next_alloc_block in ext4_inode_info - * is the logical (file-relative) number of the - * most-recently-allocated block in this file. - * We use this for detecting linearly ascending allocation requests. - */ - __u32 last_alloc_logical_block; - /* - * Was i_next_alloc_goal in ext4_inode_info - * is the *physical* companion to i_next_alloc_block. - * it the the physical block number of the block which was most-recentl - * allocated to this file. This give us the goal (target) for the next - * allocation when we detect linearly ascending requests. - */ - ext4_fsblk_t last_alloc_physical_block; -}; - -#define rsv_start rsv_window._rsv_start -#define rsv_end rsv_window._rsv_end - -/* - * storage for cached extent - */ -struct ext4_ext_cache { - ext4_fsblk_t ec_start; - __u32 ec_block; - __u32 ec_len; /* must be 32bit to return holes */ - __u32 ec_type; -}; - -/* - * third extended file system inode data in memory - */ -struct ext4_inode_info { - __le32 i_data[15]; /* unconverted */ - __u32 i_flags; -#ifdef EXT4_FRAGMENTS - __u32 i_faddr; - __u8 i_frag_no; - __u8 i_frag_size; -#endif - ext4_fsblk_t i_file_acl; - __u32 i_dir_acl; - __u32 i_dtime; - - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to - * place a file's data blocks near its inode block, and new inodes - * near to their parent directory's inode. - */ - __u32 i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ - - /* block reservation info */ - struct ext4_block_alloc_info *i_block_alloc_info; - - __u32 i_dir_start_lookup; -#ifdef CONFIG_EXT4DEV_FS_XATTR - /* - * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention - * between readers of EAs and writers of regular file data, so - * instead we synchronize on xattr_sem when reading or changing - * EAs. - */ - struct rw_semaphore xattr_sem; -#endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - - struct list_head i_orphan; /* unlinked but open inodes */ - - /* - * i_disksize keeps track of what the inode size is ON DISK, not - * in memory. During truncate, i_size is set to the new size by - * the VFS prior to calling ext4_truncate(), but the filesystem won't - * set i_disksize to 0 until the truncate is actually under way. - * - * The intent is that i_disksize always represents the blocks which - * are used by this file. This allows recovery to restart truncate - * on orphans if we crash during truncate. We actually write i_disksize - * into the on-disk inode when writing inodes out, instead of i_size. - * - * The only time when i_disksize and i_size may be different is when - * a truncate is in progress. The only things which change i_disksize - * are ext4_get_block (growth) and ext4_truncate (shrinkth). - */ - loff_t i_disksize; - - /* on-disk additional length */ - __u16 i_extra_isize; - - /* - * truncate_mutex is for serialising ext4_truncate() against - * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's - * data tree are chopped off during truncate. We can't do that in - * ext4 because whenever we perform intermediate commits during - * truncate, the inode and all the metadata blocks *must* be in a - * consistent state which allows truncation of the orphans to restart - * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have truncate_mutex. - */ - struct mutex truncate_mutex; - struct inode vfs_inode; - - unsigned long i_ext_generation; - struct ext4_ext_cache i_cached_extent; -}; - -#endif /* _LINUX_EXT4_FS_I */ diff --git a/trunk/include/linux/ext4_fs_sb.h b/trunk/include/linux/ext4_fs_sb.h deleted file mode 100644 index 691a713139ce..000000000000 --- a/trunk/include/linux/ext4_fs_sb.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * linux/include/linux/ext4_fs_sb.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_sb.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_EXT4_FS_SB -#define _LINUX_EXT4_FS_SB - -#ifdef __KERNEL__ -#include -#include -#include -#include -#endif -#include - -/* - * third extended-fs super-block data in memory - */ -struct ext4_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_desc_size; /* Size of a group descriptor in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ - unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ - unsigned long s_blocks_per_group;/* Number of blocks in a group */ - unsigned long s_inodes_per_group;/* Number of inodes in a group */ - unsigned long s_itb_per_group; /* Number of inode table blocks per group */ - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - unsigned long s_groups_count; /* Number of groups in the fs */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_group_desc; - unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; - unsigned short s_mount_state; - unsigned short s_pad; - int s_addr_per_block_bits; - int s_desc_per_block_bits; - int s_inode_size; - int s_first_ino; - spinlock_t s_next_gen_lock; - u32 s_next_generation; - u32 s_hash_seed[4]; - int s_def_hash_version; - struct percpu_counter s_freeblocks_counter; - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; - - /* root of the per fs reservation window tree */ - spinlock_t s_rsv_window_lock; - struct rb_root s_rsv_window_root; - struct ext4_reserve_window_node s_rsv_window_head; - - /* Journaling */ - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; - unsigned long s_commit_interval; - struct block_device *journal_bdev; -#ifdef CONFIG_JBD_DEBUG - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ -#endif -#ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ -#endif - -#ifdef EXTENTS_STATS - /* ext4 extents stats */ - unsigned long s_ext_min; - unsigned long s_ext_max; - unsigned long s_depth_max; - spinlock_t s_ext_stats_lock; - unsigned long s_ext_blocks; - unsigned long s_ext_extents; -#endif -}; - -#endif /* _LINUX_EXT4_FS_SB */ diff --git a/trunk/include/linux/ext4_jbd2.h b/trunk/include/linux/ext4_jbd2.h deleted file mode 100644 index 72dd631912e4..000000000000 --- a/trunk/include/linux/ext4_jbd2.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * linux/include/linux/ext4_jbd2.h - * - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1998--1999 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Ext4-specific journaling extensions. - */ - -#ifndef _LINUX_EXT4_JBD_H -#define _LINUX_EXT4_JBD_H - -#include -#include -#include - -#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) - -/* Define the number of blocks we need to account to a transaction to - * modify one block of data. - * - * We may have to touch one inode, one bitmap buffer, up to three - * indirection blocks, the group and superblock summaries, and the data - * block to complete the transaction. - * - * For extents-enabled fs we may have to allocate and modify up to - * 5 levels of tree + root which are stored in the inode. */ - -#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ - (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ - || test_opt(sb, EXTENTS) ? 27U : 8U) - -/* Extended attribute operations touch at most two data buffers, - * two bitmap buffers, and two group summaries, in addition to the inode - * and the superblock, which are already accounted for. */ - -#define EXT4_XATTR_TRANS_BLOCKS 6U - -/* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - -#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ - EXT4_XATTR_TRANS_BLOCKS - 2 + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) - -/* Delete operations potentially hit one directory's namespace plus an - * entire inode, plus arbitrary amounts of bitmap/indirection data. Be - * generous. We can grow the delete transaction later if necessary. */ - -#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64) - -/* Define an arbitrary limit for the amount of data we will anticipate - * writing to any given transaction. For unbounded transactions such as - * write(2) and truncate(2) we can write more than this, but we always - * start off at the maximum transaction size and grow the transaction - * optimistically as we go. */ - -#define EXT4_MAX_TRANS_DATA 64U - -/* We break up a large truncate or write transaction once the handle's - * buffer credits gets this low, we need either to extend the - * transaction or to start a new one. Reserve enough space here for - * inode, bitmap, superblock, group and indirection updates for at least - * one block, plus two quota updates. Quota allocations are not - * needed. */ - -#define EXT4_RESERVE_TRANS_BLOCKS 12U - -#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 - -#ifdef CONFIG_QUOTA -/* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) -/* Amount of blocks needed for quota insert/delete - we do some block writes - * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) -#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) -#else -#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 -#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 -#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 -#endif - -int -ext4_mark_iloc_dirty(handle_t *handle, - struct inode *inode, - struct ext4_iloc *iloc); - -/* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. - */ - -int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, - struct ext4_iloc *iloc); - -int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); - -/* - * Wrapper functions with which ext4 calls into JBD. The intent here is - * to allow these to be turned into appropriate stubs so ext4 can control - * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't - * been done yet. - */ - -void ext4_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -static inline int -__ext4_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = jbd2_journal_get_undo_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} - -static inline int -__ext4_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = jbd2_journal_get_write_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} - -static inline void -ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - jbd2_journal_release_buffer(handle, bh); -} - -static inline int -__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_forget(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} - -static inline int -__ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh) -{ - int err = jbd2_journal_revoke(handle, blocknr, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} - -static inline int -__ext4_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_get_create_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} - -static inline int -__ext4_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} - - -#define ext4_journal_get_undo_access(handle, bh) \ - __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) -#define ext4_journal_get_write_access(handle, bh) \ - __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) -#define ext4_journal_revoke(handle, blocknr, bh) \ - __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) -#define ext4_journal_get_create_access(handle, bh) \ - __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) -#define ext4_journal_dirty_metadata(handle, bh) \ - __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) -#define ext4_journal_forget(handle, bh) \ - __ext4_journal_forget(__FUNCTION__, (handle), (bh)) - -int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); - -handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); -int __ext4_journal_stop(const char *where, handle_t *handle); - -static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) -{ - return ext4_journal_start_sb(inode->i_sb, nblocks); -} - -#define ext4_journal_stop(handle) \ - __ext4_journal_stop(__FUNCTION__, (handle)) - -static inline handle_t *ext4_journal_current_handle(void) -{ - return journal_current_handle(); -} - -static inline int ext4_journal_extend(handle_t *handle, int nblocks) -{ - return jbd2_journal_extend(handle, nblocks); -} - -static inline int ext4_journal_restart(handle_t *handle, int nblocks) -{ - return jbd2_journal_restart(handle, nblocks); -} - -static inline int ext4_journal_blocks_per_page(struct inode *inode) -{ - return jbd2_journal_blocks_per_page(inode); -} - -static inline int ext4_journal_force_commit(journal_t *journal) -{ - return jbd2_journal_force_commit(journal); -} - -/* super.c */ -int ext4_force_commit(struct super_block *sb); - -static inline int ext4_should_journal_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return 1; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) - return 1; - return 0; -} - -static inline int ext4_should_order_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - return 1; - return 0; -} - -static inline int ext4_should_writeback_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - return 1; - return 0; -} - -#endif /* _LINUX_EXT4_JBD_H */ diff --git a/trunk/include/linux/hugetlb.h b/trunk/include/linux/hugetlb.h index 5081d27bfa27..c25a38d8f600 100644 --- a/trunk/include/linux/hugetlb.h +++ b/trunk/include/linux/hugetlb.h @@ -17,7 +17,6 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user * int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); -void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); int hugetlb_report_meminfo(char *); int hugetlb_report_node_meminfo(int, char *); diff --git a/trunk/include/linux/io.h b/trunk/include/linux/io.h index 81877ea39309..2ad96c3f0e4e 100644 --- a/trunk/include/linux/io.h +++ b/trunk/include/linux/io.h @@ -28,31 +28,4 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count); int ioremap_page_range(unsigned long addr, unsigned long end, unsigned long phys_addr, pgprot_t prot); -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(const volatile void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - #endif /* _LINUX_IO_H */ diff --git a/trunk/include/linux/jbd2.h b/trunk/include/linux/jbd2.h deleted file mode 100644 index ddb128795781..000000000000 --- a/trunk/include/linux/jbd2.h +++ /dev/null @@ -1,1107 +0,0 @@ -/* - * linux/include/linux/jbd2.h - * - * Written by Stephen C. Tweedie - * - * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Definitions for transaction data structures for the buffer cache - * filesystem journaling support. - */ - -#ifndef _LINUX_JBD_H -#define _LINUX_JBD_H - -/* Allow this file to be included directly into e2fsprogs */ -#ifndef __KERNEL__ -#include "jfs_compat.h" -#define JBD2_DEBUG -#define jfs_debug jbd_debug -#else - -#include -#include -#include -#include -#include -#include -#include - -#include -#endif - -#define journal_oom_retry 1 - -/* - * Define JBD_PARANIOD_IOFAIL to cause a kernel BUG() if ext3 finds - * certain classes of error which can occur due to failed IOs. Under - * normal use we want ext3 to continue after such errors, because - * hardware _can_ fail, but for debugging purposes when running tests on - * known-good hardware we may want to trap these errors. - */ -#undef JBD_PARANOID_IOFAIL - -/* - * The default maximum commit age, in seconds. - */ -#define JBD_DEFAULT_MAX_COMMIT_AGE 5 - -#ifdef CONFIG_JBD_DEBUG -/* - * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal - * consistency checks. By default we don't do this unless - * CONFIG_JBD_DEBUG is on. - */ -#define JBD_EXPENSIVE_CHECKING -extern int jbd2_journal_enable_debug; - -#define jbd_debug(n, f, a...) \ - do { \ - if ((n) <= jbd2_journal_enable_debug) { \ - printk (KERN_DEBUG "(%s, %d): %s: ", \ - __FILE__, __LINE__, __FUNCTION__); \ - printk (f, ## a); \ - } \ - } while (0) -#else -#define jbd_debug(f, a...) /**/ -#endif - -extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry); -extern void * jbd2_slab_alloc(size_t size, gfp_t flags); -extern void jbd2_slab_free(void *ptr, size_t size); - -#define jbd_kmalloc(size, flags) \ - __jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) -#define jbd_rep_kmalloc(size, flags) \ - __jbd2_kmalloc(__FUNCTION__, (size), (flags), 1) - -#define JBD2_MIN_JOURNAL_BLOCKS 1024 - -#ifdef __KERNEL__ - -/** - * typedef handle_t - The handle_t type represents a single atomic update being performed by some process. - * - * All filesystem modifications made by the process go - * through this handle. Recursive operations (such as quota operations) - * are gathered into a single update. - * - * The buffer credits field is used to account for journaled buffers - * being modified by the running process. To ensure that there is - * enough log space for all outstanding operations, we need to limit the - * number of outstanding buffers possible at any time. When the - * operation completes, any buffer credits not used are credited back to - * the transaction, so that at all times we know how many buffers the - * outstanding updates on a transaction might possibly touch. - * - * This is an opaque datatype. - **/ -typedef struct handle_s handle_t; /* Atomic operation type */ - - -/** - * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. - * - * journal_t is linked to from the fs superblock structure. - * - * We use the journal_t to keep track of all outstanding transaction - * activity on the filesystem, and to manage the state of the log - * writing process. - * - * This is an opaque datatype. - **/ -typedef struct journal_s journal_t; /* Journal control structure */ -#endif - -/* - * Internal structures used by the logging mechanism: - */ - -#define JBD2_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ - -/* - * On-disk structures - */ - -/* - * Descriptor block types: - */ - -#define JBD2_DESCRIPTOR_BLOCK 1 -#define JBD2_COMMIT_BLOCK 2 -#define JBD2_SUPERBLOCK_V1 3 -#define JBD2_SUPERBLOCK_V2 4 -#define JBD2_REVOKE_BLOCK 5 - -/* - * Standard header for all descriptor blocks: - */ -typedef struct journal_header_s -{ - __be32 h_magic; - __be32 h_blocktype; - __be32 h_sequence; -} journal_header_t; - - -/* - * The block tag: used to describe a single buffer in the journal. - * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this - * raw struct shouldn't be used for pointer math or sizeof() - use - * journal_tag_bytes(journal) instead to compute this. - */ -typedef struct journal_block_tag_s -{ - __be32 t_blocknr; /* The on-disk block number */ - __be32 t_flags; /* See below */ - __be32 t_blocknr_high; /* most-significant high 32bits. */ -} journal_block_tag_t; - -#define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) -#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t)) - -/* - * The revoke descriptor: used on disk to describe a series of blocks to - * be revoked from the log - */ -typedef struct jbd2_journal_revoke_header_s -{ - journal_header_t r_header; - __be32 r_count; /* Count of bytes used in the block */ -} jbd2_journal_revoke_header_t; - - -/* Definitions for the journal tag flags word: */ -#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ -#define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */ -#define JBD2_FLAG_DELETED 4 /* block deleted by this transaction */ -#define JBD2_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ - - -/* - * The journal superblock. All fields are in big-endian byte order. - */ -typedef struct journal_superblock_s -{ -/* 0x0000 */ - journal_header_t s_header; - -/* 0x000C */ - /* Static information describing the journal */ - __be32 s_blocksize; /* journal device blocksize */ - __be32 s_maxlen; /* total blocks in journal file */ - __be32 s_first; /* first block of log information */ - -/* 0x0018 */ - /* Dynamic information describing the current state of the log */ - __be32 s_sequence; /* first commit ID expected in log */ - __be32 s_start; /* blocknr of start of log */ - -/* 0x0020 */ - /* Error value, as set by jbd2_journal_abort(). */ - __be32 s_errno; - -/* 0x0024 */ - /* Remaining fields are only valid in a version-2 superblock */ - __be32 s_feature_compat; /* compatible feature set */ - __be32 s_feature_incompat; /* incompatible feature set */ - __be32 s_feature_ro_compat; /* readonly-compatible feature set */ -/* 0x0030 */ - __u8 s_uuid[16]; /* 128-bit uuid for journal */ - -/* 0x0040 */ - __be32 s_nr_users; /* Nr of filesystems sharing log */ - - __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ - -/* 0x0048 */ - __be32 s_max_transaction; /* Limit of journal blocks per trans.*/ - __be32 s_max_trans_data; /* Limit of data blocks per trans. */ - -/* 0x0050 */ - __u32 s_padding[44]; - -/* 0x0100 */ - __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ -/* 0x0400 */ -} journal_superblock_t; - -#define JBD2_HAS_COMPAT_FEATURE(j,mask) \ - ((j)->j_format_version >= 2 && \ - ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask)))) -#define JBD2_HAS_RO_COMPAT_FEATURE(j,mask) \ - ((j)->j_format_version >= 2 && \ - ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask)))) -#define JBD2_HAS_INCOMPAT_FEATURE(j,mask) \ - ((j)->j_format_version >= 2 && \ - ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) - -#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 -#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 - -/* Features known to this kernel version: */ -#define JBD2_KNOWN_COMPAT_FEATURES 0 -#define JBD2_KNOWN_ROCOMPAT_FEATURES 0 -#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ - JBD2_FEATURE_INCOMPAT_64BIT) - -#ifdef __KERNEL__ - -#include -#include - -#define JBD_ASSERTIONS -#ifdef JBD_ASSERTIONS -#define J_ASSERT(assert) \ -do { \ - if (!(assert)) { \ - printk (KERN_EMERG \ - "Assertion failure in %s() at %s:%d: \"%s\"\n", \ - __FUNCTION__, __FILE__, __LINE__, # assert); \ - BUG(); \ - } \ -} while (0) - -#if defined(CONFIG_BUFFER_DEBUG) -void buffer_assertion_failure(struct buffer_head *bh); -#define J_ASSERT_BH(bh, expr) \ - do { \ - if (!(expr)) \ - buffer_assertion_failure(bh); \ - J_ASSERT(expr); \ - } while (0) -#define J_ASSERT_JH(jh, expr) J_ASSERT_BH(jh2bh(jh), expr) -#else -#define J_ASSERT_BH(bh, expr) J_ASSERT(expr) -#define J_ASSERT_JH(jh, expr) J_ASSERT(expr) -#endif - -#else -#define J_ASSERT(assert) do { } while (0) -#endif /* JBD_ASSERTIONS */ - -#if defined(JBD_PARANOID_IOFAIL) -#define J_EXPECT(expr, why...) J_ASSERT(expr) -#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) -#define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) -#else -#define __journal_expect(expr, why...) \ - ({ \ - int val = (expr); \ - if (!val) { \ - printk(KERN_ERR \ - "EXT3-fs unexpected failure: %s;\n",# expr); \ - printk(KERN_ERR why "\n"); \ - } \ - val; \ - }) -#define J_EXPECT(expr, why...) __journal_expect(expr, ## why) -#define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) -#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) -#endif - -enum jbd_state_bits { - BH_JBD /* Has an attached ext3 journal_head */ - = BH_PrivateStart, - BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ - BH_Freed, /* Has been freed (truncated) */ - BH_Revoked, /* Has been revoked from the log */ - BH_RevokeValid, /* Revoked flag is valid */ - BH_JBDDirty, /* Is dirty but journaled */ - BH_State, /* Pins most journal_head state */ - BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ - BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ -}; - -BUFFER_FNS(JBD, jbd) -BUFFER_FNS(JWrite, jwrite) -BUFFER_FNS(JBDDirty, jbddirty) -TAS_BUFFER_FNS(JBDDirty, jbddirty) -BUFFER_FNS(Revoked, revoked) -TAS_BUFFER_FNS(Revoked, revoked) -BUFFER_FNS(RevokeValid, revokevalid) -TAS_BUFFER_FNS(RevokeValid, revokevalid) -BUFFER_FNS(Freed, freed) - -static inline struct buffer_head *jh2bh(struct journal_head *jh) -{ - return jh->b_bh; -} - -static inline struct journal_head *bh2jh(struct buffer_head *bh) -{ - return bh->b_private; -} - -static inline void jbd_lock_bh_state(struct buffer_head *bh) -{ - bit_spin_lock(BH_State, &bh->b_state); -} - -static inline int jbd_trylock_bh_state(struct buffer_head *bh) -{ - return bit_spin_trylock(BH_State, &bh->b_state); -} - -static inline int jbd_is_locked_bh_state(struct buffer_head *bh) -{ - return bit_spin_is_locked(BH_State, &bh->b_state); -} - -static inline void jbd_unlock_bh_state(struct buffer_head *bh) -{ - bit_spin_unlock(BH_State, &bh->b_state); -} - -static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) -{ - bit_spin_lock(BH_JournalHead, &bh->b_state); -} - -static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) -{ - bit_spin_unlock(BH_JournalHead, &bh->b_state); -} - -struct jbd2_revoke_table_s; - -/** - * struct handle_s - The handle_s type is the concrete type associated with - * handle_t. - * @h_transaction: Which compound transaction is this update a part of? - * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. - * @h_ref: Reference count on this handle - * @h_err: Field for caller's use to track errors through large fs operations - * @h_sync: flag for sync-on-close - * @h_jdata: flag to force data journaling - * @h_aborted: flag indicating fatal error on handle - **/ - -/* Docbook can't yet cope with the bit fields, but will leave the documentation - * in so it can be fixed later. - */ - -struct handle_s -{ - /* Which compound transaction is this update a part of? */ - transaction_t *h_transaction; - - /* Number of remaining buffers we are allowed to dirty: */ - int h_buffer_credits; - - /* Reference count on this handle */ - int h_ref; - - /* Field for caller's use to track errors through large fs */ - /* operations */ - int h_err; - - /* Flags [no locking] */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ - unsigned int h_aborted: 1; /* fatal error on handle */ -}; - - -/* The transaction_t type is the guts of the journaling mechanism. It - * tracks a compound transaction through its various states: - * - * RUNNING: accepting new updates - * LOCKED: Updates still running but we don't accept new ones - * RUNDOWN: Updates are tidying up but have finished requesting - * new buffers to modify (state not used for now) - * FLUSH: All updates complete, but we are still writing to disk - * COMMIT: All data on disk, writing commit record - * FINISHED: We still have to keep the transaction for checkpointing. - * - * The transaction keeps track of all of the buffers modified by a - * running transaction, and all of the buffers committed but not yet - * flushed to home for finished transactions. - */ - -/* - * Lock ranking: - * - * j_list_lock - * ->jbd_lock_bh_journal_head() (This is "innermost") - * - * j_state_lock - * ->jbd_lock_bh_state() - * - * jbd_lock_bh_state() - * ->j_list_lock - * - * j_state_lock - * ->t_handle_lock - * - * j_state_lock - * ->j_list_lock (journal_unmap_buffer) - * - */ - -struct transaction_s -{ - /* Pointer to the journal for this transaction. [no locking] */ - journal_t *t_journal; - - /* Sequence number for this transaction [no locking] */ - tid_t t_tid; - - /* - * Transaction's current state - * [no locking - only kjournald2 alters this] - * FIXME: needs barriers - * KLUDGE: [use j_state_lock] - */ - enum { - T_RUNNING, - T_LOCKED, - T_RUNDOWN, - T_FLUSH, - T_COMMIT, - T_FINISHED - } t_state; - - /* - * Where in the log does this transaction's commit start? [no locking] - */ - unsigned long t_log_start; - - /* Number of buffers on the t_buffers list [j_list_lock] */ - int t_nr_buffers; - - /* - * Doubly-linked circular list of all buffers reserved but not yet - * modified by this transaction [j_list_lock] - */ - struct journal_head *t_reserved_list; - - /* - * Doubly-linked circular list of all buffers under writeout during - * commit [j_list_lock] - */ - struct journal_head *t_locked_list; - - /* - * Doubly-linked circular list of all metadata buffers owned by this - * transaction [j_list_lock] - */ - struct journal_head *t_buffers; - - /* - * Doubly-linked circular list of all data buffers still to be - * flushed before this transaction can be committed [j_list_lock] - */ - struct journal_head *t_sync_datalist; - - /* - * Doubly-linked circular list of all forget buffers (superseded - * buffers which we can un-checkpoint once this transaction commits) - * [j_list_lock] - */ - struct journal_head *t_forget; - - /* - * Doubly-linked circular list of all buffers still to be flushed before - * this transaction can be checkpointed. [j_list_lock] - */ - struct journal_head *t_checkpoint_list; - - /* - * Doubly-linked circular list of all buffers submitted for IO while - * checkpointing. [j_list_lock] - */ - struct journal_head *t_checkpoint_io_list; - - /* - * Doubly-linked circular list of temporary buffers currently undergoing - * IO in the log [j_list_lock] - */ - struct journal_head *t_iobuf_list; - - /* - * Doubly-linked circular list of metadata buffers being shadowed by log - * IO. The IO buffers on the iobuf list and the shadow buffers on this - * list match each other one for one at all times. [j_list_lock] - */ - struct journal_head *t_shadow_list; - - /* - * Doubly-linked circular list of control buffers being written to the - * log. [j_list_lock] - */ - struct journal_head *t_log_list; - - /* - * Protects info related to handles - */ - spinlock_t t_handle_lock; - - /* - * Number of outstanding updates running on this transaction - * [t_handle_lock] - */ - int t_updates; - - /* - * Number of buffers reserved for use by all handles in this transaction - * handle but not yet modified. [t_handle_lock] - */ - int t_outstanding_credits; - - /* - * Forward and backward links for the circular list of all transactions - * awaiting checkpoint. [j_list_lock] - */ - transaction_t *t_cpnext, *t_cpprev; - - /* - * When will the transaction expire (become due for commit), in jiffies? - * [no locking] - */ - unsigned long t_expires; - - /* - * How many handles used this transaction? [t_handle_lock] - */ - int t_handle_count; - -}; - -/** - * struct journal_s - The journal_s type is the concrete type associated with - * journal_t. - * @j_flags: General journaling state flags - * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? - * @j_sb_buffer: First part of superblock buffer - * @j_superblock: Second part of superblock buffer - * @j_format_version: Version of the superblock format - * @j_state_lock: Protect the various scalars in the journal - * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself - * @j_running_transaction: The current running transaction.. - * @j_committing_transaction: the transaction we are pushing to disk - * @j_checkpoint_transactions: a linked circular list of all transactions - * waiting for checkpointing - * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction - * to start committing, or for a barrier lock to be released - * @j_wait_logspace: Wait queue for waiting for checkpointing to complete - * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_checkpoint: Wait queue to trigger checkpointing - * @j_wait_commit: Wait queue to trigger commit - * @j_wait_updates: Wait queue to wait for updates to complete - * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints - * @j_head: Journal head - identifies the first unused block in the journal - * @j_tail: Journal tail - identifies the oldest still-used block in the - * journal. - * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block - * @j_last: The block number one beyond the last usable block - * @j_dev: Device where we store the journal - * @j_blocksize: blocksize for the location where we store the journal. - * @j_blk_offset: starting block offset for into the device where we store the - * journal - * @j_fs_dev: Device which holds the client fs. For internal journal this will - * be equal to j_dev - * @j_maxlen: Total maximum capacity of the journal region on disk. - * @j_list_lock: Protects the buffer lists and internal buffer state. - * @j_inode: Optional inode where we store the journal. If present, all journal - * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log - * @j_transaction_sequence: Sequence number of the next transaction to grant - * @j_commit_sequence: Sequence number of the most recently committed - * transaction - * @j_commit_request: Sequence number of the most recent transaction wanting - * commit - * @j_uuid: Uuid of client object. - * @j_task: Pointer to the current commit thread for this journal - * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a - * single compound commit transaction - * @j_commit_interval: What is the maximum transaction lifetime before we begin - * a commit? - * @j_commit_timer: The timer used to wakeup the commit thread - * @j_revoke_lock: Protect the revoke table - * @j_revoke: The revoke table - maintains the list of revoked blocks in the - * current transaction. - * @j_revoke_table: alternate revoke tables for j_revoke - * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction - * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the - * number that will fit in j_blocksize - * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_private: An opaque pointer to fs-private information. - */ - -struct journal_s -{ - /* General journaling state flags [j_state_lock] */ - unsigned long j_flags; - - /* - * Is there an outstanding uncleared error on the journal (from a prior - * abort)? [j_state_lock] - */ - int j_errno; - - /* The superblock buffer */ - struct buffer_head *j_sb_buffer; - journal_superblock_t *j_superblock; - - /* Version of the superblock format */ - int j_format_version; - - /* - * Protect the various scalars in the journal - */ - spinlock_t j_state_lock; - - /* - * Number of processes waiting to create a barrier lock [j_state_lock] - */ - int j_barrier_count; - - /* The barrier lock itself */ - struct mutex j_barrier; - - /* - * Transactions: The current running transaction... - * [j_state_lock] [caller holding open handle] - */ - transaction_t *j_running_transaction; - - /* - * the transaction we are pushing to disk - * [j_state_lock] [caller holding open handle] - */ - transaction_t *j_committing_transaction; - - /* - * ... and a linked circular list of all transactions waiting for - * checkpointing. [j_list_lock] - */ - transaction_t *j_checkpoint_transactions; - - /* - * Wait queue for waiting for a locked transaction to start committing, - * or for a barrier lock to be released - */ - wait_queue_head_t j_wait_transaction_locked; - - /* Wait queue for waiting for checkpointing to complete */ - wait_queue_head_t j_wait_logspace; - - /* Wait queue for waiting for commit to complete */ - wait_queue_head_t j_wait_done_commit; - - /* Wait queue to trigger checkpointing */ - wait_queue_head_t j_wait_checkpoint; - - /* Wait queue to trigger commit */ - wait_queue_head_t j_wait_commit; - - /* Wait queue to wait for updates to complete */ - wait_queue_head_t j_wait_updates; - - /* Semaphore for locking against concurrent checkpoints */ - struct mutex j_checkpoint_mutex; - - /* - * Journal head: identifies the first unused block in the journal. - * [j_state_lock] - */ - unsigned long j_head; - - /* - * Journal tail: identifies the oldest still-used block in the journal. - * [j_state_lock] - */ - unsigned long j_tail; - - /* - * Journal free: how many free blocks are there in the journal? - * [j_state_lock] - */ - unsigned long j_free; - - /* - * Journal start and end: the block numbers of the first usable block - * and one beyond the last usable block in the journal. [j_state_lock] - */ - unsigned long j_first; - unsigned long j_last; - - /* - * Device, blocksize and starting block offset for the location where we - * store the journal. - */ - struct block_device *j_dev; - int j_blocksize; - unsigned long long j_blk_offset; - - /* - * Device which holds the client fs. For internal journal this will be - * equal to j_dev. - */ - struct block_device *j_fs_dev; - - /* Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; - - /* - * Protects the buffer lists and internal buffer state. - */ - spinlock_t j_list_lock; - - /* Optional inode where we store the journal. If present, all */ - /* journal block numbers are mapped into this inode via */ - /* bmap(). */ - struct inode *j_inode; - - /* - * Sequence number of the oldest transaction in the log [j_state_lock] - */ - tid_t j_tail_sequence; - - /* - * Sequence number of the next transaction to grant [j_state_lock] - */ - tid_t j_transaction_sequence; - - /* - * Sequence number of the most recently committed transaction - * [j_state_lock]. - */ - tid_t j_commit_sequence; - - /* - * Sequence number of the most recent transaction wanting commit - * [j_state_lock] - */ - tid_t j_commit_request; - - /* - * Journal uuid: identifies the object (filesystem, LVM volume etc) - * backed by this journal. This will eventually be replaced by an array - * of uuids, allowing us to index multiple devices within a single - * journal and to perform atomic updates across them. - */ - __u8 j_uuid[16]; - - /* Pointer to the current commit thread for this journal */ - struct task_struct *j_task; - - /* - * Maximum number of metadata buffers to allow in a single compound - * commit transaction - */ - int j_max_transaction_buffers; - - /* - * What is the maximum transaction lifetime before we begin a commit? - */ - unsigned long j_commit_interval; - - /* The timer used to wakeup the commit thread: */ - struct timer_list j_commit_timer; - - /* - * The revoke table: maintains the list of revoked blocks in the - * current transaction. [j_revoke_lock] - */ - spinlock_t j_revoke_lock; - struct jbd2_revoke_table_s *j_revoke; - struct jbd2_revoke_table_s *j_revoke_table[2]; - - /* - * array of bhs for jbd2_journal_commit_transaction - */ - struct buffer_head **j_wbuf; - int j_wbufsize; - - pid_t j_last_sync_writer; - - /* - * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here - */ - void *j_private; -}; - -/* - * Journal flag definitions - */ -#define JBD2_UNMOUNT 0x001 /* Journal thread is being destroyed */ -#define JBD2_ABORT 0x002 /* Journaling has been aborted for errors. */ -#define JBD2_ACK_ERR 0x004 /* The errno in the sb has been acked */ -#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ -#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ -#define JBD2_BARRIER 0x020 /* Use IDE barriers */ - -/* - * Function declarations for the journaling transaction and buffer - * management - */ - -/* Filing buffers */ -extern void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); -extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_unfile_buffer(struct journal_head *); -extern void __jbd2_journal_refile_buffer(struct journal_head *); -extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_free_buffer(struct journal_head *bh); -extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_clean_data_list(transaction_t *transaction); - -/* Log buffer allocation */ -extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); -int jbd2_journal_next_log_block(journal_t *, unsigned long long *); - -/* Commit management */ -extern void jbd2_journal_commit_transaction(journal_t *); - -/* Checkpoint list management */ -int __jbd2_journal_clean_checkpoint_list(journal_t *journal); -int __jbd2_journal_remove_checkpoint(struct journal_head *); -void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); - -/* Buffer IO */ -extern int -jbd2_journal_write_metadata_buffer(transaction_t *transaction, - struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned long long blocknr); - -/* Transaction locking */ -extern void __wait_on_journal (journal_t *); - -/* - * Journal locking. - * - * We need to lock the journal during transaction state changes so that nobody - * ever tries to take a handle on the running transaction while we are in the - * middle of moving it to the commit phase. j_state_lock does this. - * - * Note that the locking is completely interrupt unsafe. We never touch - * journal structures from interrupts. - */ - -static inline handle_t *journal_current_handle(void) -{ - return current->journal_info; -} - -/* The journaling code user interface: - * - * Create and destroy handles - * Register buffer modifications against the current transaction. - */ - -extern handle_t *jbd2_journal_start(journal_t *, int nblocks); -extern int jbd2_journal_restart (handle_t *, int nblocks); -extern int jbd2_journal_extend (handle_t *, int nblocks); -extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); -extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); -extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); -extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *); -extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); -extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); -extern int jbd2_journal_forget (handle_t *, struct buffer_head *); -extern void journal_sync_buffer (struct buffer_head *); -extern void jbd2_journal_invalidatepage(journal_t *, - struct page *, unsigned long); -extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); -extern int jbd2_journal_stop(handle_t *); -extern int jbd2_journal_flush (journal_t *); -extern void jbd2_journal_lock_updates (journal_t *); -extern void jbd2_journal_unlock_updates (journal_t *); - -extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, - struct block_device *fs_dev, - unsigned long long start, int len, int bsize); -extern journal_t * jbd2_journal_init_inode (struct inode *); -extern int jbd2_journal_update_format (journal_t *); -extern int jbd2_journal_check_used_features - (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_check_available_features - (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_set_features - (journal_t *, unsigned long, unsigned long, unsigned long); -extern int jbd2_journal_create (journal_t *); -extern int jbd2_journal_load (journal_t *journal); -extern void jbd2_journal_destroy (journal_t *); -extern int jbd2_journal_recover (journal_t *journal); -extern int jbd2_journal_wipe (journal_t *, int); -extern int jbd2_journal_skip_recovery (journal_t *); -extern void jbd2_journal_update_superblock (journal_t *, int); -extern void __jbd2_journal_abort_hard (journal_t *); -extern void jbd2_journal_abort (journal_t *, int); -extern int jbd2_journal_errno (journal_t *); -extern void jbd2_journal_ack_err (journal_t *); -extern int jbd2_journal_clear_err (journal_t *); -extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); -extern int jbd2_journal_force_commit(journal_t *); - -/* - * journal_head management - */ -struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); -struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); -void jbd2_journal_remove_journal_head(struct buffer_head *bh); -void jbd2_journal_put_journal_head(struct journal_head *jh); - -/* - * handle management - */ -extern kmem_cache_t *jbd2_handle_cache; - -static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) -{ - return kmem_cache_alloc(jbd2_handle_cache, gfp_flags); -} - -static inline void jbd_free_handle(handle_t *handle) -{ - kmem_cache_free(jbd2_handle_cache, handle); -} - -/* Primary revoke support */ -#define JOURNAL_REVOKE_DEFAULT_HASH 256 -extern int jbd2_journal_init_revoke(journal_t *, int); -extern void jbd2_journal_destroy_revoke_caches(void); -extern int jbd2_journal_init_revoke_caches(void); - -extern void jbd2_journal_destroy_revoke(journal_t *); -extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); -extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); -extern void jbd2_journal_write_revoke_records(journal_t *, transaction_t *); - -/* Recovery revoke support */ -extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); -extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); -extern void jbd2_journal_clear_revoke(journal_t *); -extern void jbd2_journal_switch_revoke_table(journal_t *journal); - -/* - * The log thread user interface: - * - * Request space in the current transaction, and force transaction commit - * transitions on demand. - */ - -int __jbd2_log_space_left(journal_t *); /* Called with journal locked */ -int jbd2_log_start_commit(journal_t *journal, tid_t tid); -int __jbd2_log_start_commit(journal_t *journal, tid_t tid); -int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); -int jbd2_journal_force_commit_nested(journal_t *journal); -int jbd2_log_wait_commit(journal_t *journal, tid_t tid); -int jbd2_log_do_checkpoint(journal_t *journal); - -void __jbd2_log_wait_for_space(journal_t *journal); -extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); -extern int jbd2_cleanup_journal_tail(journal_t *); - -/* Debugging code only: */ - -#define jbd_ENOSYS() \ -do { \ - printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \ - current->state = TASK_UNINTERRUPTIBLE; \ - schedule(); \ -} while (1) - -/* - * is_journal_abort - * - * Simple test wrapper function to test the JBD2_ABORT state flag. This - * bit, when set, indicates that we have had a fatal error somewhere, - * either inside the journaling layer or indicated to us by the client - * (eg. ext3), and that we and should not commit any further - * transactions. - */ - -static inline int is_journal_aborted(journal_t *journal) -{ - return journal->j_flags & JBD2_ABORT; -} - -static inline int is_handle_aborted(handle_t *handle) -{ - if (handle->h_aborted) - return 1; - return is_journal_aborted(handle->h_transaction->t_journal); -} - -static inline void jbd2_journal_abort_handle(handle_t *handle) -{ - handle->h_aborted = 1; -} - -#endif /* __KERNEL__ */ - -/* Comparison functions for transaction IDs: perform comparisons using - * modulo arithmetic so that they work over sequence number wraps. */ - -static inline int tid_gt(tid_t x, tid_t y) -{ - int difference = (x - y); - return (difference > 0); -} - -static inline int tid_geq(tid_t x, tid_t y) -{ - int difference = (x - y); - return (difference >= 0); -} - -extern int jbd2_journal_blocks_per_page(struct inode *inode); -extern size_t journal_tag_bytes(journal_t *journal); - -/* - * Return the minimum number of blocks which must be free in the journal - * before a new transaction may be started. Must be called under j_state_lock. - */ -static inline int jbd_space_needed(journal_t *journal) -{ - int nblocks = journal->j_max_transaction_buffers; - if (journal->j_committing_transaction) - nblocks += journal->j_committing_transaction-> - t_outstanding_credits; - return nblocks; -} - -/* - * Definitions which augment the buffer_head layer - */ - -/* journaling buffer types */ -#define BJ_None 0 /* Not journaled */ -#define BJ_SyncData 1 /* Normal data: flush before commit */ -#define BJ_Metadata 2 /* Normal journaled metadata */ -#define BJ_Forget 3 /* Buffer superseded by this transaction */ -#define BJ_IO 4 /* Buffer is for temporary IO use */ -#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 6 /* Buffer contains log descriptors */ -#define BJ_Reserved 7 /* Buffer is reserved for access by journal */ -#define BJ_Locked 8 /* Locked for I/O during commit */ -#define BJ_Types 9 - -extern int jbd_blocks_per_page(struct inode *inode); - -#ifdef __KERNEL__ - -#define buffer_trace_init(bh) do {} while (0) -#define print_buffer_fields(bh) do {} while (0) -#define print_buffer_trace(bh) do {} while (0) -#define BUFFER_TRACE(bh, info) do {} while (0) -#define BUFFER_TRACE2(bh, bh2, info) do {} while (0) -#define JBUFFER_TRACE(jh, info) do {} while (0) - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_JBD_H */ diff --git a/trunk/include/linux/magic.h b/trunk/include/linux/magic.h index 156c40fc664e..22036dd2ba36 100644 --- a/trunk/include/linux/magic.h +++ b/trunk/include/linux/magic.h @@ -8,7 +8,6 @@ #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 -#define EXT4_SUPER_MAGIC 0xEF53 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/trunk/include/linux/mm.h b/trunk/include/linux/mm.h index 5a6068ff5556..26146623be2f 100644 --- a/trunk/include/linux/mm.h +++ b/trunk/include/linux/mm.h @@ -1103,7 +1103,12 @@ static inline void vm_stat_account(struct mm_struct *mm, #ifndef CONFIG_DEBUG_PAGEALLOC static inline void -kernel_map_pages(struct page *page, int numpages, int enable) {} +kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!PageHighMem(page) && !enable) + debug_check_no_locks_freed(page_address(page), + numpages * PAGE_SIZE); +} #endif extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); diff --git a/trunk/include/linux/module.h b/trunk/include/linux/module.h index d1d00ce8f4ed..4b2d8091a410 100644 --- a/trunk/include/linux/module.h +++ b/trunk/include/linux/module.h @@ -317,6 +317,9 @@ struct module /* Am I unsafe to unload? */ int unsafe; + /* Am I GPL-compatible */ + int license_gplok; + unsigned int taints; /* same bits as kernel:tainted */ #ifdef CONFIG_MODULE_UNLOAD diff --git a/trunk/include/linux/nbd.h b/trunk/include/linux/nbd.h index d6b6dc09ad97..e712e7d47cc2 100644 --- a/trunk/include/linux/nbd.h +++ b/trunk/include/linux/nbd.h @@ -15,8 +15,6 @@ #ifndef LINUX_NBD_H #define LINUX_NBD_H -#include - #define NBD_SET_SOCK _IO( 0xab, 0 ) #define NBD_SET_BLKSIZE _IO( 0xab, 1 ) #define NBD_SET_SIZE _IO( 0xab, 2 ) diff --git a/trunk/include/linux/nodemask.h b/trunk/include/linux/nodemask.h index b1063e9cdb1b..5dce5c21822c 100644 --- a/trunk/include/linux/nodemask.h +++ b/trunk/include/linux/nodemask.h @@ -8,8 +8,8 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * - * For details of nodemask_scnprintf() and nodemask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. + * For details of nodemask_scnprintf() and nodemask_parse(), + * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c. * For details of nodelist_scnprintf() and nodelist_parse(), see * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. @@ -51,7 +51,7 @@ * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing - * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask + * int nodemask_parse(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) @@ -288,12 +288,12 @@ static inline int __nodemask_scnprintf(char *buf, int len, return bitmap_scnprintf(buf, len, srcp->bits, nbits); } -#define nodemask_parse_user(ubuf, ulen, dst) \ - __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) -static inline int __nodemask_parse_user(const char __user *buf, int len, +#define nodemask_parse(ubuf, ulen, dst) \ + __nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES) +static inline int __nodemask_parse(const char __user *buf, int len, nodemask_t *dstp, int nbits) { - return bitmap_parse_user(buf, len, dstp->bits, nbits); + return bitmap_parse(buf, len, dstp->bits, nbits); } #define nodelist_scnprintf(buf, len, src) \ diff --git a/trunk/include/linux/security.h b/trunk/include/linux/security.h index b200b9856f32..9b5fea81f55e 100644 --- a/trunk/include/linux/security.h +++ b/trunk/include/linux/security.h @@ -882,8 +882,7 @@ struct request_sock; * Check permission when a flow selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. - * Return 0 if permission is granted, -ESRCH otherwise, or -errno - * on other errors. + * Return 0 if permission is granted. * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. @@ -892,7 +891,6 @@ struct request_sock; * @xfrm_flow_state_match: * @fl contains the flow key to match. * @xfrm points to the xfrm_state to match. - * @xp points to the xfrm_policy to match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. @@ -1390,8 +1388,7 @@ struct security_operations { int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); - int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm, - struct xfrm_policy *xp); + int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -3123,6 +3120,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return security_ops->xfrm_policy_clone_security(old, new); @@ -3173,10 +3175,9 @@ static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, return security_ops->xfrm_state_pol_flow_match(x, xp, fl); } -static inline int security_xfrm_flow_state_match(struct flowi *fl, - struct xfrm_state *xfrm, struct xfrm_policy *xp) +static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) { - return security_ops->xfrm_flow_state_match(fl, xfrm, xp); + return security_ops->xfrm_flow_state_match(fl, xfrm); } static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) @@ -3196,6 +3197,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return 0; +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; @@ -3243,7 +3249,7 @@ static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, } static inline int security_xfrm_flow_state_match(struct flowi *fl, - struct xfrm_state *xfrm, struct xfrm_policy *xp) + struct xfrm_state *xfrm) { return 1; } diff --git a/trunk/include/linux/syscalls.h b/trunk/include/linux/syscalls.h index 1912c6cbef55..b0ace3fd7eb9 100644 --- a/trunk/include/linux/syscalls.h +++ b/trunk/include/linux/syscalls.h @@ -431,10 +431,6 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event); asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, int timeout); -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, - const sigset_t __user *sigmask, - size_t sigsetsize); asmlinkage long sys_gethostname(char __user *name, int len); asmlinkage long sys_sethostname(char __user *name, int len); asmlinkage long sys_setdomainname(char __user *name, int len); diff --git a/trunk/include/linux/videodev2.h b/trunk/include/linux/videodev2.h index df5c4654360d..c5fdf6259548 100644 --- a/trunk/include/linux/videodev2.h +++ b/trunk/include/linux/videodev2.h @@ -243,7 +243,7 @@ struct v4l2_pix_format #define V4L2_PIX_FMT_YUV420 v4l2_fourcc('Y','U','1','2') /* 12 YUV 4:2:0 */ #define V4L2_PIX_FMT_YYUV v4l2_fourcc('Y','Y','U','V') /* 16 YUV 4:2:2 */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H','I','2','4') /* 8 8-bit color */ -#define V4L2_PIX_FMT_HM12 v4l2_fourcc('H','M','1','2') /* 8 YUV 4:2:0 16x16 macroblocks */ +#define V4L2_PIX_FMT_HM12 v4l2_fourcc('H','M','1','2') /* 8 YUV 4:1:1 16x16 macroblocks */ /* see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B','A','8','1') /* 8 BGBG.. GRGR.. */ diff --git a/trunk/include/net/flow.h b/trunk/include/net/flow.h index 3b44d72b27d3..ddf5f3ca1720 100644 --- a/trunk/include/net/flow.h +++ b/trunk/include/net/flow.h @@ -97,7 +97,7 @@ struct flowi { #define FLOW_DIR_FWD 2 struct sock; -typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, +typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, void **objp, atomic_t **obj_refp); extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, diff --git a/trunk/include/net/inet_timewait_sock.h b/trunk/include/net/inet_timewait_sock.h index 5f48748fe017..6d14c22a00c5 100644 --- a/trunk/include/net/inet_timewait_sock.h +++ b/trunk/include/net/inet_timewait_sock.h @@ -196,7 +196,6 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { struct module *owner = tw->tw_prot->owner; - twsk_destructor((struct sock *)tw); #ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); diff --git a/trunk/include/net/netlabel.h b/trunk/include/net/netlabel.h index 12c214b9eadf..c63a58058e21 100644 --- a/trunk/include/net/netlabel.h +++ b/trunk/include/net/netlabel.h @@ -34,7 +34,6 @@ #include #include #include -#include /* * NetLabel - A management interface for maintaining network packet label @@ -107,7 +106,6 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); /* LSM security attributes */ struct netlbl_lsm_cache { - atomic_t refcount; void (*free) (const void *data); void *data; }; @@ -119,7 +117,7 @@ struct netlbl_lsm_secattr { unsigned char *mls_cat; size_t mls_cat_len; - struct netlbl_lsm_cache *cache; + struct netlbl_lsm_cache cache; }; /* @@ -127,43 +125,6 @@ struct netlbl_lsm_secattr { */ -/** - * netlbl_secattr_cache_alloc - Allocate and initialize a secattr cache - * @flags: the memory allocation flags - * - * Description: - * Allocate and initialize a netlbl_lsm_cache structure. Returns a pointer - * on success, NULL on failure. - * - */ -static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) -{ - struct netlbl_lsm_cache *cache; - - cache = kzalloc(sizeof(*cache), flags); - if (cache) - atomic_set(&cache->refcount, 1); - return cache; -} - -/** - * netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct - * @cache: the struct to free - * - * Description: - * Frees @secattr including all of the internal buffers. - * - */ -static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) -{ - if (!atomic_dec_and_test(&cache->refcount)) - return; - - if (cache->free) - cache->free(cache->data); - kfree(cache); -} - /** * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct * @secattr: the struct to initialize @@ -182,16 +143,20 @@ static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) /** * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct * @secattr: the struct to clear + * @clear_cache: cache clear flag * * Description: * Destroys the @secattr struct, including freeing all of the internal buffers. - * The struct must be reset with a call to netlbl_secattr_init() before reuse. + * If @clear_cache is true then free the cache fields, otherwise leave them + * intact. The struct must be reset with a call to netlbl_secattr_init() + * before reuse. * */ -static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr) +static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) { - if (secattr->cache) - netlbl_secattr_cache_free(secattr->cache); + if (clear_cache && secattr->cache.data != NULL && secattr->cache.free) + secattr->cache.free(secattr->cache.data); kfree(secattr->domain); kfree(secattr->mls_cat); } @@ -213,14 +178,17 @@ static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags) /** * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct * @secattr: the struct to free + * @clear_cache: cache clear flag * * Description: - * Frees @secattr including all of the internal buffers. + * Frees @secattr including all of the internal buffers. If @clear_cache is + * true then free the cache fields, otherwise leave them intact. * */ -static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr) +static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) { - netlbl_secattr_destroy(secattr); + netlbl_secattr_destroy(secattr, clear_cache); kfree(secattr); } diff --git a/trunk/include/net/sctp/sctp.h b/trunk/include/net/sctp/sctp.h index 764e3af5be93..ee68a3124076 100644 --- a/trunk/include/net/sctp/sctp.h +++ b/trunk/include/net/sctp/sctp.h @@ -139,7 +139,6 @@ int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); -void sctp_sock_rfree(struct sk_buff *skb); /* * sctp/primitive.c @@ -445,19 +444,6 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list) return result; } -/* SCTP version of skb_set_owner_r. We need this one because - * of the way we have to do receive buffer accounting on bundled - * chunks. - */ -static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - struct sctp_ulpevent *event = sctp_skb2event(skb); - - skb->sk = sk; - skb->destructor = sctp_sock_rfree; - atomic_add(event->rmem_len, &sk->sk_rmem_alloc); -} - /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { diff --git a/trunk/include/net/sctp/ulpevent.h b/trunk/include/net/sctp/ulpevent.h index 1a4ddc1ec7d2..6c40cfc4832d 100644 --- a/trunk/include/net/sctp/ulpevent.h +++ b/trunk/include/net/sctp/ulpevent.h @@ -63,7 +63,6 @@ struct sctp_ulpevent { __u32 cumtsn; int msg_flags; int iif; - unsigned int rmem_len; }; /* Retrieve the skb this event sits inside of. */ diff --git a/trunk/include/net/timewait_sock.h b/trunk/include/net/timewait_sock.h index be293d795e38..2544281e1d5e 100644 --- a/trunk/include/net/timewait_sock.h +++ b/trunk/include/net/timewait_sock.h @@ -19,7 +19,6 @@ struct timewait_sock_ops { unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); - void (*twsk_destructor)(struct sock *sk); }; static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -29,10 +28,4 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } -static inline void twsk_destructor(struct sock *sk) -{ - if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) - sk->sk_prot->twsk_prot->twsk_destructor(sk); -} - #endif /* _TIMEWAIT_SOCK_H */ diff --git a/trunk/include/net/xfrm.h b/trunk/include/net/xfrm.h index 737fdb2ee8a4..1e2a4ddec96e 100644 --- a/trunk/include/net/xfrm.h +++ b/trunk/include/net/xfrm.h @@ -995,8 +995,7 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family); extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); +extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; diff --git a/trunk/kernel/irq/proc.c b/trunk/kernel/irq/proc.c index 9a352667007c..607c7809ad01 100644 --- a/trunk/kernel/irq/proc.c +++ b/trunk/kernel/irq/proc.c @@ -57,7 +57,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, if (!irq_desc[irq].chip->set_affinity || no_irq_affinity) return -EIO; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse(buffer, count, new_value); if (err) return err; diff --git a/trunk/kernel/lockdep.c b/trunk/kernel/lockdep.c index 805a322a5655..4c0553461000 100644 --- a/trunk/kernel/lockdep.c +++ b/trunk/kernel/lockdep.c @@ -1114,6 +1114,8 @@ static int count_matching_names(struct lock_class *new_class) return count + 1; } +extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); + /* * Register a lock's class in the hash-table, if the class is not present * yet. Otherwise we look it up. We cache the result in the lock object @@ -1151,7 +1153,8 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) * (or spin_lock_init()) call - which acts as the key. For static * locks we use the lock object itself as the key. */ - BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); + if (sizeof(struct lock_class_key) > sizeof(struct lock_class)) + __error_too_big_MAX_LOCKDEP_SUBCLASSES(); key = lock->key->subkeys + subclass; diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index 67009bd56c52..7f60e782de1e 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -87,12 +87,6 @@ static inline int strong_try_module_get(struct module *mod) return try_module_get(mod); } -static inline void add_taint_module(struct module *mod, unsigned flag) -{ - add_taint(flag); - mod->taints |= flag; -} - /* A thread that wants to hold a reference to a module only while it * is running can call ths to safely exit. * nfsd and lockd use this. @@ -853,10 +847,12 @@ static int check_version(Elf_Shdr *sechdrs, return 0; } /* Not in module's version table. OK, but that taints the kernel. */ - if (!(tainted & TAINT_FORCED_MODULE)) + if (!(tainted & TAINT_FORCED_MODULE)) { printk("%s: no version for \"%s\" found: kernel tainted.\n", mod->name, symname); - add_taint_module(mod, TAINT_FORCED_MODULE); + add_taint(TAINT_FORCED_MODULE); + mod->taints |= TAINT_FORCED_MODULE; + } return 1; } @@ -914,8 +910,7 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, unsigned long ret; const unsigned long *crc; - ret = __find_symbol(name, &owner, &crc, - !(mod->taints & TAINT_PROPRIETARY_MODULE)); + ret = __find_symbol(name, &owner, &crc, mod->license_gplok); if (ret) { /* use_module can fail due to OOM, or module unloading */ if (!check_version(sechdrs, versindex, name, mod, crc) || @@ -1340,11 +1335,12 @@ static void set_license(struct module *mod, const char *license) if (!license) license = "unspecified"; - if (!license_is_gpl_compatible(license)) { - if (!(tainted & TAINT_PROPRIETARY_MODULE)) - printk(KERN_WARNING "%s: module license '%s' taints" - "kernel.\n", mod->name, license); - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + mod->license_gplok = license_is_gpl_compatible(license); + if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) { + printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", + mod->name, license); + add_taint(TAINT_PROPRIETARY_MODULE); + mod->taints |= TAINT_PROPRIETARY_MODULE; } } @@ -1623,7 +1619,8 @@ static struct module *load_module(void __user *umod, modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - add_taint_module(mod, TAINT_FORCED_MODULE); + add_taint(TAINT_FORCED_MODULE); + mod->taints |= TAINT_FORCED_MODULE; printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", mod->name); } else if (!same_magic(modmagic, vermagic)) { @@ -1717,10 +1714,14 @@ static struct module *load_module(void __user *umod, /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); - if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); - if (strcmp(mod->name, "driverloader") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + if (strcmp(mod->name, "ndiswrapper") == 0) { + add_taint(TAINT_PROPRIETARY_MODULE); + mod->taints |= TAINT_PROPRIETARY_MODULE; + } + if (strcmp(mod->name, "driverloader") == 0) { + add_taint(TAINT_PROPRIETARY_MODULE); + mod->taints |= TAINT_PROPRIETARY_MODULE; + } /* Set up MODINFO_ATTR fields */ setup_modinfo(mod, sechdrs, infoindex); @@ -1765,7 +1766,8 @@ static struct module *load_module(void __user *umod, (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { printk(KERN_WARNING "%s: No versions for exported symbols." " Tainting kernel.\n", mod->name); - add_taint_module(mod, TAINT_FORCED_MODULE); + add_taint(TAINT_FORCED_MODULE); + mod->taints |= TAINT_FORCED_MODULE; } #endif @@ -2130,33 +2132,9 @@ static void m_stop(struct seq_file *m, void *p) mutex_unlock(&module_mutex); } -static char *taint_flags(unsigned int taints, char *buf) -{ - int bx = 0; - - if (taints) { - buf[bx++] = '('; - if (taints & TAINT_PROPRIETARY_MODULE) - buf[bx++] = 'P'; - if (taints & TAINT_FORCED_MODULE) - buf[bx++] = 'F'; - /* - * TAINT_FORCED_RMMOD: could be added. - * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't - * apply to modules. - */ - buf[bx++] = ')'; - } - buf[bx] = '\0'; - - return buf; -} - static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); - char buf[8]; - seq_printf(m, "%s %lu", mod->name, mod->init_size + mod->core_size); print_unload_info(m, mod); @@ -2169,10 +2147,6 @@ static int m_show(struct seq_file *m, void *p) /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%p", mod->module_core); - /* Taints info */ - if (mod->taints) - seq_printf(m, " %s", taint_flags(mod->taints, buf)); - seq_printf(m, "\n"); return 0; } @@ -2261,6 +2235,28 @@ struct module *module_text_address(unsigned long addr) return mod; } +static char *taint_flags(unsigned int taints, char *buf) +{ + *buf = '\0'; + if (taints) { + int bx; + + buf[0] = '('; + bx = 1; + if (taints & TAINT_PROPRIETARY_MODULE) + buf[bx++] = 'P'; + if (taints & TAINT_FORCED_MODULE) + buf[bx++] = 'F'; + /* + * TAINT_FORCED_RMMOD: could be added. + * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't + * apply to modules. + */ + buf[bx] = ')'; + } + return buf; +} + /* Don't grab lock, we're oopsing. */ void print_modules(void) { diff --git a/trunk/kernel/power/disk.c b/trunk/kernel/power/disk.c index d3a158a60312..d72234942798 100644 --- a/trunk/kernel/power/disk.c +++ b/trunk/kernel/power/disk.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "power.h" @@ -120,10 +119,8 @@ int pm_suspend_disk(void) if (error) return error; - suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) { - resume_console(); printk("Some devices failed to suspend\n"); unprepare_processes(); return error; @@ -136,7 +133,6 @@ int pm_suspend_disk(void) if (in_suspend) { device_resume(); - resume_console(); pr_debug("PM: writing image.\n"); error = swsusp_write(); if (!error) @@ -152,7 +148,6 @@ int pm_suspend_disk(void) swsusp_free(); Done: device_resume(); - resume_console(); unprepare_processes(); return error; } @@ -217,9 +212,7 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); - suspend_console(); if ((error = device_suspend(PMSG_PRETHAW))) { - resume_console(); printk("Some devices failed to suspend\n"); swsusp_free(); goto Thaw; @@ -231,7 +224,6 @@ static int software_resume(void) swsusp_resume(); pr_debug("PM: Restore failed, recovering.n"); device_resume(); - resume_console(); Thaw: unprepare_processes(); Done: diff --git a/trunk/kernel/power/user.c b/trunk/kernel/power/user.c index d991d3b0e5a4..93b5dd283dea 100644 --- a/trunk/kernel/power/user.c +++ b/trunk/kernel/power/user.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -174,14 +173,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (!error) { - suspend_console(); error = device_suspend(PMSG_FREEZE); if (!error) { in_suspend = 1; error = swsusp_suspend(); device_resume(); } - resume_console(); } up(&pm_sem); if (!error) @@ -199,13 +196,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); - suspend_console(); error = device_suspend(PMSG_PRETHAW); if (!error) { error = swsusp_resume(); device_resume(); } - resume_console(); pm_restore_console(); up(&pm_sem); break; @@ -294,7 +289,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } /* Put devices to sleep */ - suspend_console(); error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "Failed to suspend some devices.\n"); @@ -305,7 +299,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, /* Wake up devices */ device_resume(); } - resume_console(); + if (pm_ops->finish) pm_ops->finish(PM_SUSPEND_MEM); diff --git a/trunk/kernel/printk.c b/trunk/kernel/printk.c index f7d427ef5038..771f5e861bcd 100644 --- a/trunk/kernel/printk.c +++ b/trunk/kernel/printk.c @@ -820,8 +820,15 @@ void release_console_sem(void) console_locked = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); - if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) - wake_up_interruptible(&log_wait); + if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { + /* + * If we printk from within the lock dependency code, + * from within the scheduler code, then do not lock + * up due to self-recursion: + */ + if (!lockdep_internal()) + wake_up_interruptible(&log_wait); + } } EXPORT_SYMBOL(release_console_sem); diff --git a/trunk/kernel/profile.c b/trunk/kernel/profile.c index f940b462eec9..857300a2afec 100644 --- a/trunk/kernel/profile.c +++ b/trunk/kernel/profile.c @@ -399,7 +399,7 @@ static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffe unsigned long full_count = count, err; cpumask_t new_value; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse(buffer, count, new_value); if (err) return err; diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 094b5687eef6..53608a59d6e3 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; - if (!mm) { + if (unlikely(!mm)) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (!prev->mm) { + if (unlikely(!prev->mm)) { prev->active_mm = NULL; WARN_ON(rq->prev_mm); rq->prev_mm = oldmm; @@ -3491,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void) * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. */ - if (likely(ti->preempt_count || irqs_disabled())) + if (unlikely(ti->preempt_count || irqs_disabled())) return; need_resched: diff --git a/trunk/kernel/workqueue.c b/trunk/kernel/workqueue.c index 3df9bfc7ff78..cfc737bffe6d 100644 --- a/trunk/kernel/workqueue.c +++ b/trunk/kernel/workqueue.c @@ -28,7 +28,6 @@ #include #include #include -#include /* * The per-CPU workqueue (if single thread, we always use the first @@ -246,12 +245,6 @@ static int worker_thread(void *__cwq) sigprocmask(SIG_BLOCK, &blocked, NULL); flush_signals(current); - /* - * We inherited MPOL_INTERLEAVE from the booting kernel. - * Set MPOL_DEFAULT to insure node local allocations. - */ - numa_default_policy(); - /* SIG_IGN makes children autoreap: see do_notify_parent(). */ sa.sa.sa_handler = SIG_IGN; sa.sa.sa_flags = 0; diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug index 77491e311791..756a908c441d 100644 --- a/trunk/lib/Kconfig.debug +++ b/trunk/lib/Kconfig.debug @@ -71,7 +71,7 @@ config LOG_BUF_SHIFT config DETECT_SOFTLOCKUP bool "Detect Soft Lockups" - depends on DEBUG_KERNEL && !S390 + depends on DEBUG_KERNEL default y help Say Y here to enable the kernel to detect "soft lockups", @@ -371,20 +371,6 @@ config FORCED_INLINING become the default in the future, until then this option is there to test gcc for this. -config HEADERS_CHECK - bool "Run 'make headers_check' when building vmlinux" - depends on !UML - help - This option will extract the user-visible kernel headers whenever - building the kernel, and will run basic sanity checks on them to - ensure that exported files do not attempt to include files which - were not exported, etc. - - If you're making modifications to header files which are - relevant for userspace, say 'Y', and check the headers - exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in - your build tree), to make sure they're suitable. - config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL diff --git a/trunk/lib/Makefile b/trunk/lib/Makefile index 59070dbfbeb4..8e6662bb9c37 100644 --- a/trunk/lib/Makefile +++ b/trunk/lib/Makefile @@ -5,7 +5,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o irq_regs.o carta_random32.o + sha1.o irq_regs.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/trunk/lib/bitmap.c b/trunk/lib/bitmap.c index 037fa9aa2ed7..d71e38c54ea5 100644 --- a/trunk/lib/bitmap.c +++ b/trunk/lib/bitmap.c @@ -316,11 +316,10 @@ int bitmap_scnprintf(char *buf, unsigned int buflen, EXPORT_SYMBOL(bitmap_scnprintf); /** - * __bitmap_parse - convert an ASCII hex string into a bitmap. - * @buf: pointer to buffer containing string. - * @buflen: buffer size in bytes. If string is smaller than this + * bitmap_parse - convert an ASCII hex string into a bitmap. + * @ubuf: pointer to buffer in user space containing string. + * @ubuflen: buffer size in bytes. If string is smaller than this * then it must be terminated with a \0. - * @is_user: location of buffer, 0 indicates kernel space * @maskp: pointer to bitmap array that will contain result. * @nmaskbits: size of bitmap, in bits. * @@ -331,13 +330,11 @@ EXPORT_SYMBOL(bitmap_scnprintf); * characters and for grouping errors such as "1,,5", ",44", "," and "". * Leading and trailing whitespace accepted, but not embedded whitespace. */ -int __bitmap_parse(const char *buf, unsigned int buflen, - int is_user, unsigned long *maskp, - int nmaskbits) +int bitmap_parse(const char __user *ubuf, unsigned int ubuflen, + unsigned long *maskp, int nmaskbits) { int c, old_c, totaldigits, ndigits, nchunks, nbits; u32 chunk; - const char __user *ubuf = buf; bitmap_zero(maskp, nmaskbits); @@ -346,15 +343,11 @@ int __bitmap_parse(const char *buf, unsigned int buflen, chunk = ndigits = 0; /* Get the next chunk of the bitmap */ - while (buflen) { + while (ubuflen) { old_c = c; - if (is_user) { - if (__get_user(c, ubuf++)) - return -EFAULT; - } - else - c = *buf++; - buflen--; + if (get_user(c, ubuf++)) + return -EFAULT; + ubuflen--; if (isspace(c)) continue; @@ -395,36 +388,11 @@ int __bitmap_parse(const char *buf, unsigned int buflen, nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ; if (nbits > nmaskbits) return -EOVERFLOW; - } while (buflen && c == ','); + } while (ubuflen && c == ','); return 0; } -EXPORT_SYMBOL(__bitmap_parse); - -/** - * bitmap_parse_user() - * - * @ubuf: pointer to user buffer containing string. - * @ulen: buffer size in bytes. If string is smaller than this - * then it must be terminated with a \0. - * @maskp: pointer to bitmap array that will contain result. - * @nmaskbits: size of bitmap, in bits. - * - * Wrapper for __bitmap_parse(), providing it with user buffer. - * - * We cannot have this as an inline function in bitmap.h because it needs - * linux/uaccess.h to get the access_ok() declaration and this causes - * cyclic dependencies. - */ -int bitmap_parse_user(const char __user *ubuf, - unsigned int ulen, unsigned long *maskp, - int nmaskbits) -{ - if (!access_ok(VERIFY_READ, ubuf, ulen)) - return -EFAULT; - return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits); -} -EXPORT_SYMBOL(bitmap_parse_user); +EXPORT_SYMBOL(bitmap_parse); /* * bscnl_emit(buf, buflen, rbot, rtop, bp) diff --git a/trunk/lib/carta_random32.c b/trunk/lib/carta_random32.c deleted file mode 100644 index ca82df70eee4..000000000000 --- a/trunk/lib/carta_random32.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. - * Contributed by David Mosberger-Tang - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307 USA - */ -#include -#include - -/* - * Fast, simple, yet decent quality random number generator based on - * a paper by David G. Carta ("Two Fast Implementations of the - * `Minimal Standard' Random Number Generator," Communications of the - * ACM, January, 1990). - */ -u64 carta_random32 (u64 seed) -{ -# define A 16807 -# define M ((u32) 1 << 31) - u64 s, prod = A * seed, p, q; - - p = (prod >> 31) & (M - 1); - q = (prod >> 0) & (M - 1); - s = p + q; - if (s >= M) - s -= M - 1; - return s; -} -EXPORT_SYMBOL_GPL(carta_random32); diff --git a/trunk/lib/kobject.c b/trunk/lib/kobject.c index 1699eb9161f3..7dd5c0e9d996 100644 --- a/trunk/lib/kobject.c +++ b/trunk/lib/kobject.c @@ -119,6 +119,7 @@ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) return path; } +EXPORT_SYMBOL_GPL(kobject_get_path); /** * kobject_init - initialize object. diff --git a/trunk/mm/hugetlb.c b/trunk/mm/hugetlb.c index 2dbec90dc3ba..1d709ff528e1 100644 --- a/trunk/mm/hugetlb.c +++ b/trunk/mm/hugetlb.c @@ -356,8 +356,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, return -ENOMEM; } -void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) +void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -398,24 +398,6 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } } -void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - /* - * It is undesirable to test vma->vm_file as it should be non-null - * for valid hugetlb area. However, vm_file will be NULL in the error - * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, - * do_mmap_pgoff() nullifies vma->vm_file before calling this function - * to clean up. Since no pte has actually been setup, it is safe to - * do nothing in this case. - */ - if (vma->vm_file) { - spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); - __unmap_hugepage_range(vma, start, end); - spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); - } -} - static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte) { diff --git a/trunk/mm/mempolicy.c b/trunk/mm/mempolicy.c index 617fb31086ee..25788b1b7fcf 100644 --- a/trunk/mm/mempolicy.c +++ b/trunk/mm/mempolicy.c @@ -727,7 +727,7 @@ int do_migrate_pages(struct mm_struct *mm, return -ENOSYS; } -static struct page *new_vma_page(struct page *page, unsigned long private, int **x) +static struct page *new_vma_page(struct page *page, unsigned long private) { return NULL; } diff --git a/trunk/mm/mmap.c b/trunk/mm/mmap.c index 497e502dfd6b..eea8eefd51a8 100644 --- a/trunk/mm/mmap.c +++ b/trunk/mm/mmap.c @@ -900,6 +900,17 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, int accountable = 1; unsigned long charged = 0, reqprot = prot; + if (file) { + if (is_file_hugepages(file)) + accountable = 0; + + if (!file->f_op || !file->f_op->mmap) + return -ENODEV; + + if ((prot & PROT_EXEC) && + (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)) + return -EPERM; + } /* * Does the application expect PROT_READ to imply PROT_EXEC? * @@ -989,16 +1000,6 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, case MAP_PRIVATE: if (!(file->f_mode & FMODE_READ)) return -EACCES; - if (file->f_vfsmnt->mnt_flags & MNT_NOEXEC) { - if (vm_flags & VM_EXEC) - return -EPERM; - vm_flags &= ~VM_MAYEXEC; - } - if (is_file_hugepages(file)) - accountable = 0; - - if (!file->f_op || !file->f_op->mmap) - return -ENODEV; break; default: diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c index 40db96a655d0..a8c003e7b3d5 100644 --- a/trunk/mm/page_alloc.c +++ b/trunk/mm/page_alloc.c @@ -495,16 +495,17 @@ static void __free_pages_ok(struct page *page, unsigned int order) int i; int reserved = 0; + arch_free_page(page, order); + if (!PageHighMem(page)) + debug_check_no_locks_freed(page_address(page), + PAGE_SIZE<mapping = NULL; if (free_pages_check(page)) return; - if (!PageHighMem(page)) - debug_check_no_locks_freed(page_address(page), PAGE_SIZE); - arch_free_page(page, 0); kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; @@ -2294,6 +2294,19 @@ unsigned long __init zone_absent_pages_in_node(int nid, return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } +/* Return the zone index a PFN is in */ +int memmap_zone_idx(struct page *lmem_map) +{ + int i; + unsigned long phys_addr = virt_to_phys(lmem_map); + unsigned long pfn = phys_addr >> PAGE_SHIFT; + + for (i = 0; i < MAX_NR_ZONES; i++) + if (pfn < arch_zone_highest_possible_pfn[i]) + break; + + return i; +} #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -2312,6 +2325,10 @@ static inline unsigned long zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } +static inline int memmap_zone_idx(struct page *lmem_map) +{ + return MAX_NR_ZONES; +} #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, diff --git a/trunk/mm/rmap.c b/trunk/mm/rmap.c index a9136d8b7577..e2155d791d99 100644 --- a/trunk/mm/rmap.c +++ b/trunk/mm/rmap.c @@ -576,14 +576,15 @@ void page_add_file_rmap(struct page *page) void page_remove_rmap(struct page *page) { if (atomic_add_negative(-1, &page->_mapcount)) { +#ifdef CONFIG_DEBUG_VM if (unlikely(page_mapcount(page) < 0)) { printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); printk (KERN_EMERG " page->flags = %lx\n", page->flags); printk (KERN_EMERG " page->count = %x\n", page_count(page)); printk (KERN_EMERG " page->mapping = %p\n", page->mapping); - BUG(); } - +#endif + BUG_ON(page_mapcount(page) < 0); /* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap diff --git a/trunk/mm/shmem_acl.c b/trunk/mm/shmem_acl.c index f5664c5b9eb1..c946bf468718 100644 --- a/trunk/mm/shmem_acl.c +++ b/trunk/mm/shmem_acl.c @@ -35,7 +35,7 @@ shmem_get_acl(struct inode *inode, int type) } /** - * shmem_set_acl - generic_acl_operations->setacl() operation + * shmem_get_acl - generic_acl_operations->setacl() operation */ static void shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) diff --git a/trunk/mm/truncate.c b/trunk/mm/truncate.c index 11ca480701dd..f4edbc179d14 100644 --- a/trunk/mm/truncate.c +++ b/trunk/mm/truncate.c @@ -302,7 +302,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return 0; - if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) + if (PagePrivate(page) && !try_to_release_page(page, 0)) return 0; write_lock_irq(&mapping->tree_lock); @@ -396,7 +396,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pagevec_release(&pvec); cond_resched(); } - WARN_ON_ONCE(ret); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); diff --git a/trunk/net/compat.c b/trunk/net/compat.c index 52d32f1bc728..d5d69fa15d07 100644 --- a/trunk/net/compat.c +++ b/trunk/net/compat.c @@ -285,7 +285,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); - err = put_user(SOL_SOCKET, &cm->cmsg_level); + if (!err) + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) diff --git a/trunk/net/core/flow.c b/trunk/net/core/flow.c index b16d31ae5e54..f23e7e386543 100644 --- a/trunk/net/core/flow.c +++ b/trunk/net/core/flow.c @@ -85,14 +85,6 @@ static void flow_cache_new_hashrnd(unsigned long arg) add_timer(&flow_hash_rnd_timer); } -static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) -{ - if (fle->object) - atomic_dec(fle->object_ref); - kmem_cache_free(flow_cachep, fle); - flow_count(cpu)--; -} - static void __flow_cache_shrink(int cpu, int shrink_to) { struct flow_cache_entry *fle, **flp; @@ -108,7 +100,10 @@ static void __flow_cache_shrink(int cpu, int shrink_to) } while ((fle = *flp) != NULL) { *flp = fle->next; - flow_entry_kill(cpu, fle); + if (fle->object) + atomic_dec(fle->object_ref); + kmem_cache_free(flow_cachep, fle); + flow_count(cpu)--; } } } @@ -225,33 +220,24 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, nocache: { - int err; void *obj; atomic_t *obj_ref; - err = resolver(key, family, dir, &obj, &obj_ref); + resolver(key, family, dir, &obj, &obj_ref); if (fle) { - if (err) { - /* Force security policy check on next lookup */ - *head = fle->next; - flow_entry_kill(cpu, fle); - } else { - fle->genid = atomic_read(&flow_cache_genid); - - if (fle->object) - atomic_dec(fle->object_ref); - - fle->object = obj; - fle->object_ref = obj_ref; - if (obj) - atomic_inc(fle->object_ref); - } + fle->genid = atomic_read(&flow_cache_genid); + + if (fle->object) + atomic_dec(fle->object_ref); + + fle->object = obj; + fle->object_ref = obj_ref; + if (obj) + atomic_inc(fle->object_ref); } local_bh_enable(); - if (err) - obj = ERR_PTR(err); return obj; } } diff --git a/trunk/net/core/rtnetlink.c b/trunk/net/core/rtnetlink.c index 02f3c7947898..221e4038216b 100644 --- a/trunk/net/core/rtnetlink.c +++ b/trunk/net/core/rtnetlink.c @@ -602,7 +602,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) goto errout; } - err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); + err = rtnl_unicast(skb, NETLINK_CB(skb).pid); errout: kfree(iw_buf); dev_put(dev); diff --git a/trunk/net/core/scm.c b/trunk/net/core/scm.c index 271cf060ef8c..649d01ef35b6 100644 --- a/trunk/net/core/scm.c +++ b/trunk/net/core/scm.c @@ -245,7 +245,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) if (i > 0) { int cmlen = CMSG_LEN(i*sizeof(int)); - err = put_user(SOL_SOCKET, &cm->cmsg_level); + if (!err) + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) diff --git a/trunk/net/dccp/ipv4.c b/trunk/net/dccp/ipv4.c index 7e746c4c1688..bf692c1c116f 100644 --- a/trunk/net/dccp/ipv4.c +++ b/trunk/net/dccp/ipv4.c @@ -311,7 +311,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -614,7 +614,7 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put(inet_twsk(nsk)); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -980,7 +980,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto discard_it; do_time_wait: - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); goto no_dccp_socket; } diff --git a/trunk/net/dccp/ipv6.c b/trunk/net/dccp/ipv6.c index 7171a78671aa..7a47399cf31f 100644 --- a/trunk/net/dccp/ipv6.c +++ b/trunk/net/dccp/ipv6.c @@ -285,7 +285,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -663,7 +663,7 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put(inet_twsk(nsk)); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1109,7 +1109,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb) goto discard_it; do_time_wait: - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); goto no_dccp_socket; } diff --git a/trunk/net/decnet/af_decnet.c b/trunk/net/decnet/af_decnet.c index 3456cd331835..70e027375682 100644 --- a/trunk/net/decnet/af_decnet.c +++ b/trunk/net/decnet/af_decnet.c @@ -1178,10 +1178,8 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len if (peer) { if ((sock->state != SS_CONNECTED && sock->state != SS_CONNECTING) && - scp->accept_mode == ACC_IMMED) { - release_sock(sk); + scp->accept_mode == ACC_IMMED) return -ENOTCONN; - } memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn)); } else { diff --git a/trunk/net/decnet/dn_route.c b/trunk/net/decnet/dn_route.c index a2a43d8d93fe..dd0761e3d280 100644 --- a/trunk/net/decnet/dn_route.c +++ b/trunk/net/decnet/dn_route.c @@ -267,14 +267,9 @@ static void dn_dst_link_failure(struct sk_buff *skb) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | - (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | -#ifdef CONFIG_IP_ROUTE_FWMARK - (fl1->nl_u.dn_u.fwmark ^ fl2->nl_u.dn_u.fwmark) | -#endif - (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return memcmp(&fl1->nl_u.dn_u, &fl2->nl_u.dn_u, sizeof(fl1->nl_u.dn_u)) == 0 && + fl1->oif == fl2->oif && + fl1->iif == fl2->iif; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) diff --git a/trunk/net/ipv4/cipso_ipv4.c b/trunk/net/ipv4/cipso_ipv4.c index bde8ccaa1531..a8e2e879a647 100644 --- a/trunk/net/ipv4/cipso_ipv4.c +++ b/trunk/net/ipv4/cipso_ipv4.c @@ -43,7 +43,6 @@ #include #include #include -#include #include struct cipso_v4_domhsh_entry { @@ -80,7 +79,7 @@ struct cipso_v4_map_cache_entry { unsigned char *key; size_t key_len; - struct netlbl_lsm_cache *lsm_data; + struct netlbl_lsm_cache lsm_data; u32 activity; struct list_head list; @@ -189,14 +188,13 @@ static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) * @entry: the entry to free * * Description: - * This function frees the memory associated with a cache entry including the - * LSM cache data if there are no longer any users, i.e. reference count == 0. + * This function frees the memory associated with a cache entry. * */ static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) { - if (entry->lsm_data) - netlbl_secattr_cache_free(entry->lsm_data); + if (entry->lsm_data.free) + entry->lsm_data.free(entry->lsm_data.data); kfree(entry->key); kfree(entry); } @@ -317,8 +315,8 @@ static int cipso_v4_cache_check(const unsigned char *key, entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; - atomic_inc(&entry->lsm_data->refcount); - secattr->cache = entry->lsm_data; + secattr->cache.free = entry->lsm_data.free; + secattr->cache.data = entry->lsm_data.data; if (prev_entry == NULL) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -385,8 +383,8 @@ int cipso_v4_cache_add(const struct sk_buff *skb, memcpy(entry->key, cipso_ptr, cipso_ptr_len); entry->key_len = cipso_ptr_len; entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); - atomic_inc(&secattr->cache->refcount); - entry->lsm_data = secattr->cache; + entry->lsm_data.free = secattr->cache.free; + entry->lsm_data.data = secattr->cache.data; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); diff --git a/trunk/net/ipv4/ip_gre.c b/trunk/net/ipv4/ip_gre.c index d5b5dec075b8..f5fba051df3d 100644 --- a/trunk/net/ipv4/ip_gre.c +++ b/trunk/net/ipv4/ip_gre.c @@ -611,8 +611,8 @@ static int ipgre_rcv(struct sk_buff *skb) * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && - skb->protocol == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); + skb->protocol == __constant_htons(ETH_P_WCCP)) { + skb->protocol = __constant_htons(ETH_P_IP); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index 925ee4dfc32c..c41ddba02e9d 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -566,15 +566,9 @@ static inline u32 rt_score(struct rtable *rt) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | - (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | -#ifdef CONFIG_IP_ROUTE_FWMARK - (fl1->nl_u.ip4_u.fwmark ^ fl2->nl_u.ip4_u.fwmark) | -#endif - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ - *(u16 *)&fl2->nl_u.ip4_u.tos) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 && + fl1->oif == fl2->oif && + fl1->iif == fl2->iif; } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED diff --git a/trunk/net/ipv4/tcp_ipv4.c b/trunk/net/ipv4/tcp_ipv4.c index 6bbd98575172..c83938b8fcb1 100644 --- a/trunk/net/ipv4/tcp_ipv4.c +++ b/trunk/net/ipv4/tcp_ipv4.c @@ -355,7 +355,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } if (sk->sk_state == TCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -578,7 +578,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, struct tcphdr *th = skb->h.th; struct { struct tcphdr th; - u32 tsopt[TCPOLEN_TSTAMP_ALIGNED >> 2]; + u32 tsopt[3]; } rep; struct ip_reply_arg arg; @@ -960,7 +960,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put(inet_twsk(nsk)); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1154,24 +1154,26 @@ int tcp_v4_rcv(struct sk_buff *skb) do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_deschedule((struct inet_timewait_sock *)sk, + &tcp_death_row); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } diff --git a/trunk/net/ipv4/tcp_output.c b/trunk/net/ipv4/tcp_output.c index f22536e32cb1..9a253faefc81 100644 --- a/trunk/net/ipv4/tcp_output.c +++ b/trunk/net/ipv4/tcp_output.c @@ -273,10 +273,10 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp) { if (tp->rx_opt.tstamp_ok) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); *ptr++ = htonl(tstamp); *ptr++ = htonl(tp->rx_opt.ts_recent); } @@ -325,27 +325,18 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); if (ts) { if(sack) - *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | - (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); + *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); else - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *ptr++ = htonl(tstamp); /* TSVAL */ *ptr++ = htonl(ts_recent); /* TSECR */ } else if(sack) - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | - TCPOLEN_SACK_PERM); + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_WINDOW << 16) | - (TCPOLEN_WINDOW << 8) | - (wscale)); + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); } /* This routine actually transmits TCP packets queued in by diff --git a/trunk/net/ipv4/xfrm4_policy.c b/trunk/net/ipv4/xfrm4_policy.c index 1bed0cdf53e3..7a7a00147e55 100644 --- a/trunk/net/ipv4/xfrm4_policy.c +++ b/trunk/net/ipv4/xfrm4_policy.c @@ -52,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) { + xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { dst_clone(dst); break; } diff --git a/trunk/net/ipv6/Kconfig b/trunk/net/ipv6/Kconfig index ef5eaad44851..a460e8132b4d 100644 --- a/trunk/net/ipv6/Kconfig +++ b/trunk/net/ipv6/Kconfig @@ -153,19 +153,6 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION ---help--- Support for MIPv6 route optimization mode. -config IPV6_SIT - tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" - depends on IPV6 - default y - ---help--- - Tunneling means encapsulating data of one protocol type within - another protocol and sending it over a channel that understands the - encapsulating protocol. This driver implements encapsulation of IPv6 - into IPv4 packets. This is useful if you want to connect two IPv6 - networks over an IPv4-only path. - - Saying M here will produce a module called sit.ko. If unsure, say Y. - config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" select INET6_TUNNEL diff --git a/trunk/net/ipv6/Makefile b/trunk/net/ipv6/Makefile index addcc011bc01..87274e47fe32 100644 --- a/trunk/net/ipv6/Makefile +++ b/trunk/net/ipv6/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_IPV6) += ipv6.o -ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ +ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ @@ -29,7 +29,6 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_NETFILTER) += netfilter/ -obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-y += exthdrs_core.o diff --git a/trunk/net/ipv6/addrconf.c b/trunk/net/ipv6/addrconf.c index b312a5f7a759..e03c33b2465b 100644 --- a/trunk/net/ipv6/addrconf.c +++ b/trunk/net/ipv6/addrconf.c @@ -396,10 +396,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ndev->regen_timer.data = (unsigned long) ndev; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) - dev->type == ARPHRD_SIT || -#endif - dev->type == ARPHRD_NONE) { + dev->type == ARPHRD_NONE || + dev->type == ARPHRD_SIT) { printk(KERN_INFO "%s: Disabled Privacy Extensions\n", dev->name); @@ -1548,10 +1546,8 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) cfg.fc_flags |= RTF_NONEXTHOP; -#endif ip6_route_add(&cfg); } @@ -1573,7 +1569,6 @@ static void addrconf_add_mroute(struct net_device *dev) ip6_route_add(&cfg); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void sit_route_add(struct net_device *dev) { struct fib6_config cfg = { @@ -1587,7 +1582,6 @@ static void sit_route_add(struct net_device *dev) /* prefix length - 96 bits "::d.d.d.d" */ ip6_route_add(&cfg); } -#endif static void addrconf_add_lroute(struct net_device *dev) { @@ -1858,7 +1852,6 @@ int addrconf_set_dstaddr(void __user *arg) if (dev == NULL) goto err_exit; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT) { struct ifreq ifr; mm_segment_t oldfs; @@ -1888,7 +1881,6 @@ int addrconf_set_dstaddr(void __user *arg) err = dev_open(dev); } } -#endif err_exit: rtnl_unlock(); @@ -2018,7 +2010,6 @@ int addrconf_del_ifaddr(void __user *arg) return err; } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void sit_add_v4_addrs(struct inet6_dev *idev) { struct inet6_ifaddr * ifp; @@ -2087,7 +2078,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } } } -#endif static void init_loopback(struct net_device *dev) { @@ -2151,7 +2141,6 @@ static void addrconf_dev_config(struct net_device *dev) addrconf_add_linklocal(idev, &addr); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void addrconf_sit_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2177,7 +2166,6 @@ static void addrconf_sit_config(struct net_device *dev) } else sit_route_add(dev); } -#endif static inline int ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) @@ -2272,11 +2260,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } switch(dev->type) { -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) case ARPHRD_SIT: addrconf_sit_config(dev); break; -#endif case ARPHRD_TUNNEL6: addrconf_ip6_tnl_config(dev); break; diff --git a/trunk/net/ipv6/af_inet6.c b/trunk/net/ipv6/af_inet6.c index 858cae29581c..e94eccb99707 100644 --- a/trunk/net/ipv6/af_inet6.c +++ b/trunk/net/ipv6/af_inet6.c @@ -850,6 +850,7 @@ static int __init inet6_init(void) err = addrconf_init(); if (err) goto addrconf_fail; + sit_init(); /* Init v6 extension headers. */ ipv6_rthdr_init(); @@ -926,6 +927,7 @@ static void __exit inet6_exit(void) mip6_fini(); #endif /* Cleanup code parts. */ + sit_cleanup(); ip6_flowlabel_cleanup(); addrconf_cleanup(); ip6_route_cleanup(); diff --git a/trunk/net/ipv6/sit.c b/trunk/net/ipv6/sit.c index dc5765b62b87..836eecd7e62b 100644 --- a/trunk/net/ipv6/sit.c +++ b/trunk/net/ipv6/sit.c @@ -850,6 +850,3 @@ int __init sit_init(void) inet_del_protocol(&sit_protocol, IPPROTO_IPV6); goto out; } - -module_init(sit_init); -module_exit(sit_cleanup); diff --git a/trunk/net/ipv6/tcp_ipv6.c b/trunk/net/ipv6/tcp_ipv6.c index 4c2a7c0cafef..3b6575478fcc 100644 --- a/trunk/net/ipv6/tcp_ipv6.c +++ b/trunk/net/ipv6/tcp_ipv6.c @@ -329,7 +329,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -653,7 +653,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 int tot_len = sizeof(struct tcphdr); if (ts) - tot_len += TCPOLEN_TSTAMP_ALIGNED; + tot_len += 3*4; buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); @@ -749,7 +749,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - inet_twsk_put(inet_twsk(nsk)); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1283,17 +1283,18 @@ static int tcp_v6_rcv(struct sk_buff **pskb) do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2; diff --git a/trunk/net/ipv6/xfrm6_policy.c b/trunk/net/ipv6/xfrm6_policy.c index 73cee2ec07e8..6a252e2134d1 100644 --- a/trunk/net/ipv6/xfrm6_policy.c +++ b/trunk/net/ipv6/xfrm6_policy.c @@ -73,7 +73,7 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt6.rt6i_src.plen); if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(policy, xdst, fl, AF_INET6, + xfrm_bundle_ok(xdst, fl, AF_INET6, (xdst->u.rt6.rt6i_dst.plen != 128 || xdst->u.rt6.rt6i_src.plen != 128))) { dst_clone(dst); diff --git a/trunk/net/key/af_key.c b/trunk/net/key/af_key.c index 20ff7cca1d07..ff98e70b0931 100644 --- a/trunk/net/key/af_key.c +++ b/trunk/net/key/af_key.c @@ -2928,6 +2928,11 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, if (*dir) goto out; } + else { + *dir = security_xfrm_sock_policy_alloc(xp, sk); + if (*dir) + goto out; + } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/trunk/net/netlabel/netlabel_kapi.c b/trunk/net/netlabel/netlabel_kapi.c index ff971103fd0c..54fb7de3c2b1 100644 --- a/trunk/net/netlabel/netlabel_kapi.c +++ b/trunk/net/netlabel/netlabel_kapi.c @@ -200,7 +200,7 @@ void netlbl_cache_invalidate(void) int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr) { - if (secattr->cache == NULL) + if (secattr->cache.data == NULL) return -ENOMSG; if (CIPSO_V4_OPTEXIST(skb)) diff --git a/trunk/net/sched/sch_htb.c b/trunk/net/sched/sch_htb.c index 9b9c555c713f..bb3ddd4784b1 100644 --- a/trunk/net/sched/sch_htb.c +++ b/trunk/net/sched/sch_htb.c @@ -786,10 +786,11 @@ static long htb_do_events(struct htb_sched *q, int level) for (i = 0; i < 500; i++) { struct htb_class *cl; long diff; - struct rb_node *p = rb_first(&q->wait_pq[level]); - + struct rb_node *p = q->wait_pq[level].rb_node; if (!p) return 0; + while (p->rb_left) + p = p->rb_left; cl = rb_entry(p, struct htb_class, pq_node); if (time_after(cl->pq_key, q->jiffies)) { diff --git a/trunk/net/sctp/proc.c b/trunk/net/sctp/proc.c index 7f49e769080e..a356d8d310a9 100644 --- a/trunk/net/sctp/proc.c +++ b/trunk/net/sctp/proc.c @@ -344,7 +344,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, assoc->sndbuf_used, - atomic_read(&assoc->rmem_alloc), + (sk->sk_rcvbuf - assoc->rwnd), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/trunk/net/sctp/socket.c b/trunk/net/sctp/socket.c index 9deec4391187..3fe906d65069 100644 --- a/trunk/net/sctp/socket.c +++ b/trunk/net/sctp/socket.c @@ -5362,20 +5362,6 @@ static void sctp_wfree(struct sk_buff *skb) sctp_association_put(asoc); } -/* Do accounting for the receive space on the socket. - * Accounting for the association is done in ulpevent.c - * We set this as a destructor for the cloned data skbs so that - * accounting is done at the correct time. - */ -void sctp_sock_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - struct sctp_ulpevent *event = sctp_skb2event(skb); - - atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); -} - - /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len) @@ -5648,10 +5634,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sctp_sock_rfree(skb); + sock_rfree(skb); __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); - sctp_skb_set_owner_r(skb, newsk); + skb_set_owner_r(skb, newsk); } } @@ -5679,10 +5665,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sctp_sock_rfree(skb); + sock_rfree(skb); __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); - sctp_skb_set_owner_r(skb, newsk); + skb_set_owner_r(skb, newsk); } } diff --git a/trunk/net/sctp/ulpevent.c b/trunk/net/sctp/ulpevent.c index a015283a9087..ee236784a6bb 100644 --- a/trunk/net/sctp/ulpevent.c +++ b/trunk/net/sctp/ulpevent.c @@ -55,13 +55,10 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ -SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, - int msg_flags, - unsigned int len) +SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags) { memset(event, 0, sizeof(struct sctp_ulpevent)); event->msg_flags = msg_flags; - event->rmem_len = len; } /* Create a new sctp_ulpevent. */ @@ -76,7 +73,7 @@ SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, goto fail; event = sctp_skb2event(skb); - sctp_ulpevent_init(event, msg_flags, skb->truesize); + sctp_ulpevent_init(event, msg_flags); return event; @@ -104,16 +101,17 @@ static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event, sctp_association_hold((struct sctp_association *)asoc); skb = sctp_event2skb(event); event->asoc = (struct sctp_association *)asoc; - atomic_add(event->rmem_len, &event->asoc->rmem_alloc); - sctp_skb_set_owner_r(skb, asoc->base.sk); + atomic_add(skb->truesize, &event->asoc->rmem_alloc); + skb_set_owner_r(skb, asoc->base.sk); } /* A simple destructor to give up the reference to the association. */ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event) { struct sctp_association *asoc = event->asoc; + struct sk_buff *skb = sctp_event2skb(event); - atomic_sub(event->rmem_len, &asoc->rmem_alloc); + atomic_sub(skb->truesize, &asoc->rmem_alloc); sctp_association_put(asoc); } @@ -374,7 +372,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); - sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); + sctp_ulpevent_init(event, MSG_NOTIFICATION); sre = (struct sctp_remote_error *) skb_push(skb, sizeof(struct sctp_remote_error)); @@ -466,7 +464,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); - sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); + sctp_ulpevent_init(event, MSG_NOTIFICATION); ssf = (struct sctp_send_failed *) skb_push(skb, sizeof(struct sctp_send_failed)); @@ -684,11 +682,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); - /* Initialize event with flags 0 and correct length - * Since this is a clone of the original skb, only account for - * the data of this chunk as other chunks will be accounted separately. - */ - sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff)); + /* Initialize event with flags 0. */ + sctp_ulpevent_init(event, 0); sctp_ulpevent_receive_data(event, asoc); diff --git a/trunk/net/sctp/ulpqueue.c b/trunk/net/sctp/ulpqueue.c index e1d144275f97..575e556aeb3e 100644 --- a/trunk/net/sctp/ulpqueue.c +++ b/trunk/net/sctp/ulpqueue.c @@ -309,7 +309,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu if (!new) return NULL; /* try again later */ - sctp_skb_set_owner_r(new, f_frag->sk); + new->sk = f_frag->sk; skb_shinfo(new)->frag_list = pos; } else diff --git a/trunk/net/xfrm/xfrm_policy.c b/trunk/net/xfrm/xfrm_policy.c index 7736b23c3f03..2a7861661f14 100644 --- a/trunk/net/xfrm/xfrm_policy.c +++ b/trunk/net/xfrm/xfrm_policy.c @@ -883,32 +883,30 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*) } EXPORT_SYMBOL(xfrm_policy_walk); -/* - * Find policy to apply to this flow. - * - * Returns 0 if policy found, else an -errno. - */ +/* Find policy to apply to this flow. */ + static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, u8 type, u16 family, int dir) { struct xfrm_selector *sel = &pol->selector; - int match, ret = -ESRCH; + int match; if (pol->family != family || pol->type != type) - return ret; + return 0; match = xfrm_selector_match(sel, fl, family); - if (match) - ret = security_xfrm_policy_lookup(pol, fl->secid, dir); + if (match) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) + return 1; + } - return ret; + return 0; } static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, u16 family, u8 dir) { - int err; struct xfrm_policy *pol, *ret; xfrm_address_t *daddr, *saddr; struct hlist_node *entry; @@ -924,15 +922,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, chain = policy_hash_direct(daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { - err = xfrm_policy_match(pol, fl, type, family, dir); - if (err) { - if (err == -ESRCH) - continue; - else { - ret = ERR_PTR(err); - goto fail; - } - } else { + if (xfrm_policy_match(pol, fl, type, family, dir)) { ret = pol; priority = ret->priority; break; @@ -940,53 +930,36 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, } chain = &xfrm_policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { - err = xfrm_policy_match(pol, fl, type, family, dir); - if (err) { - if (err == -ESRCH) - continue; - else { - ret = ERR_PTR(err); - goto fail; - } - } else if (pol->priority < priority) { + if (xfrm_policy_match(pol, fl, type, family, dir) && + pol->priority < priority) { ret = pol; break; } } if (ret) xfrm_pol_hold(ret); -fail: read_unlock_bh(&xfrm_policy_lock); return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; - int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); - pol = NULL; - } - if (pol || err) + if (pol) goto end; #endif pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); - pol = NULL; - } + #ifdef CONFIG_XFRM_SUB_POLICY end: #endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; - return err; } static inline int policy_to_flow_dir(int dir) @@ -1016,16 +989,12 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc sk->sk_family); int err = 0; - if (match) { - err = security_xfrm_policy_lookup(pol, fl->secid, - policy_to_flow_dir(dir)); - if (!err) - xfrm_pol_hold(pol); - else if (err == -ESRCH) - pol = NULL; - else - pol = ERR_PTR(err); - } else + if (match) + err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); + + if (match && !err) + xfrm_pol_hold(pol); + else pol = NULL; } read_unlock_bh(&xfrm_policy_lock); @@ -1317,11 +1286,8 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, pol_dead = 0; xfrm_nr = 0; - if (sk && sk->sk_policy[1]) { + if (sk && sk->sk_policy[1]) policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); - if (IS_ERR(policy)) - return PTR_ERR(policy); - } if (!policy) { /* To accelerate a bit... */ @@ -1331,8 +1297,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); - if (IS_ERR(policy)) - return PTR_ERR(policy); } if (!policy) @@ -1379,10 +1343,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, fl, family, XFRM_POLICY_OUT); if (pols[1]) { - if (IS_ERR(pols[1])) { - err = PTR_ERR(pols[1]); - goto error; - } if (pols[1]->action == XFRM_POLICY_BLOCK) { err = -EPERM; goto error; @@ -1614,19 +1574,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } pol = NULL; - if (sk && sk->sk_policy[dir]) { + if (sk && sk->sk_policy[dir]) pol = xfrm_sk_policy_lookup(sk, dir, &fl); - if (IS_ERR(pol)) - return 0; - } if (!pol) pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); - if (IS_ERR(pol)) - return 0; - if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -1645,8 +1599,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, &fl, family, XFRM_POLICY_IN); if (pols[1]) { - if (IS_ERR(pols[1])) - return 0; pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; npols ++; } @@ -1754,7 +1706,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1876,8 +1828,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - struct flowi *fl, int family, int strict) +int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1894,7 +1845,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; - if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol)) + if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; diff --git a/trunk/net/xfrm/xfrm_user.c b/trunk/net/xfrm/xfrm_user.c index 2b2e59d8ffbc..d54b3a70d5df 100644 --- a/trunk/net/xfrm/xfrm_user.c +++ b/trunk/net/xfrm/xfrm_user.c @@ -1992,6 +1992,15 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); + if (!xp->security) { + int err = security_xfrm_sock_policy_alloc(xp, sk); + if (err) { + kfree(xp); + *dir = err; + return NULL; + } + } + *dir = p->dir; return xp; diff --git a/trunk/scripts/kconfig/lxdialog/dialog.h b/trunk/scripts/kconfig/lxdialog/dialog.h index fd695e1070f7..8dea47f9d3e4 100644 --- a/trunk/scripts/kconfig/lxdialog/dialog.h +++ b/trunk/scripts/kconfig/lxdialog/dialog.h @@ -24,7 +24,6 @@ #include #include #include -#include #ifdef __sun__ #define CURS_MACROS diff --git a/trunk/scripts/kernel-doc b/trunk/scripts/kernel-doc index 187f5de4612c..00d1ad19b2cc 100755 --- a/trunk/scripts/kernel-doc +++ b/trunk/scripts/kernel-doc @@ -1262,9 +1262,7 @@ sub output_intro_text(%) { } ## -# generic output function for all types (function, struct/union, typedef, enum); -# calls the generated, variable output_ function name based on -# functype and output_mode +# generic output function for typedefs sub output_declaration { no strict 'refs'; my $name = shift; @@ -1280,7 +1278,8 @@ sub output_declaration { } ## -# generic output function - calls the right one based on current output mode. +# generic output function - calls the right one based +# on current output mode. sub output_intro { no strict 'refs'; my $func = "output_intro_".$output_mode; @@ -1519,9 +1518,6 @@ sub dump_function($$) { $prototype =~ s/^asmlinkage +//; $prototype =~ s/^inline +//; $prototype =~ s/^__inline__ +//; - $prototype =~ s/^__inline +//; - $prototype =~ s/^__always_inline +//; - $prototype =~ s/^noinline +//; $prototype =~ s/__devinit +//; $prototype =~ s/^#define +//; #ak added $prototype =~ s/__attribute__ \(\([a-z,]*\)\)//; @@ -1782,9 +1778,8 @@ sub process_file($) { $in_doc_sect = 1; $contents = $newcontents; if ($contents ne "") { - while ((substr($contents, 0, 1) eq " ") || - substr($contents, 0, 1) eq "\t") { - $contents = substr($contents, 1); + if (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); } $contents .= "\n"; } diff --git a/trunk/security/dummy.c b/trunk/security/dummy.c index 43874c1e6e23..aeee70565509 100644 --- a/trunk/security/dummy.c +++ b/trunk/security/dummy.c @@ -881,8 +881,7 @@ static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x, return 1; } -static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm, - struct xfrm_policy *xp) +static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) { return 1; } diff --git a/trunk/security/selinux/include/xfrm.h b/trunk/security/selinux/include/xfrm.h index 526b28019aca..81eb59890162 100644 --- a/trunk/security/selinux/include/xfrm.h +++ b/trunk/security/selinux/include/xfrm.h @@ -19,8 +19,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x); int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); -int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm, - struct xfrm_policy *xp); +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); /* diff --git a/trunk/security/selinux/ss/policydb.c b/trunk/security/selinux/ss/policydb.c index ba48961f9d05..b18895302555 100644 --- a/trunk/security/selinux/ss/policydb.c +++ b/trunk/security/selinux/ss/policydb.c @@ -618,7 +618,6 @@ void policydb_destroy(struct policydb *p) c = c->next; ocontext_destroy(ctmp,i); } - p->ocontexts[i] = NULL; } g = p->genfs; @@ -634,7 +633,6 @@ void policydb_destroy(struct policydb *p) g = g->next; kfree(gtmp); } - p->genfs = NULL; cond_policydb_destroy(p); diff --git a/trunk/security/selinux/ss/services.c b/trunk/security/selinux/ss/services.c index 18274b005090..0c219a1b3243 100644 --- a/trunk/security/selinux/ss/services.c +++ b/trunk/security/selinux/ss/services.c @@ -2172,12 +2172,7 @@ struct netlbl_cache { */ static void selinux_netlbl_cache_free(const void *data) { - struct netlbl_cache *cache; - - if (data == NULL) - return; - - cache = NETLBL_CACHE(data); + struct netlbl_cache *cache = NETLBL_CACHE(data); switch (cache->type) { case NETLBL_CACHE_T_MLS: ebitmap_destroy(&cache->data.mls_label.level[0].cat); @@ -2202,20 +2197,17 @@ static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) struct netlbl_lsm_secattr secattr; netlbl_secattr_init(&secattr); - secattr.cache = netlbl_secattr_cache_alloc(GFP_ATOMIC); - if (secattr.cache == NULL) - goto netlbl_cache_add_return; cache = kzalloc(sizeof(*cache), GFP_ATOMIC); if (cache == NULL) - goto netlbl_cache_add_return; - secattr.cache->free = selinux_netlbl_cache_free; - secattr.cache->data = (void *)cache; + goto netlbl_cache_add_failure; + secattr.cache.free = selinux_netlbl_cache_free; + secattr.cache.data = (void *)cache; cache->type = NETLBL_CACHE_T_MLS; if (ebitmap_cpy(&cache->data.mls_label.level[0].cat, &ctx->range.level[0].cat) != 0) - goto netlbl_cache_add_return; + goto netlbl_cache_add_failure; cache->data.mls_label.level[1].cat.highbit = cache->data.mls_label.level[0].cat.highbit; cache->data.mls_label.level[1].cat.node = @@ -2223,10 +2215,13 @@ static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) cache->data.mls_label.level[0].sens = ctx->range.level[0].sens; cache->data.mls_label.level[1].sens = ctx->range.level[0].sens; - netlbl_cache_add(skb, &secattr); + if (netlbl_cache_add(skb, &secattr) != 0) + goto netlbl_cache_add_failure; -netlbl_cache_add_return: - netlbl_secattr_destroy(&secattr); + return; + +netlbl_cache_add_failure: + netlbl_secattr_destroy(&secattr, 1); } /** @@ -2268,8 +2263,8 @@ static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, POLICY_RDLOCK; - if (secattr->cache) { - cache = NETLBL_CACHE(secattr->cache->data); + if (secattr->cache.data) { + cache = NETLBL_CACHE(secattr->cache.data); switch (cache->type) { case NETLBL_CACHE_T_SID: *sid = cache->data.sid; @@ -2336,7 +2331,7 @@ static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, selinux_netlbl_cache_add(skb, &ctx_new); ebitmap_destroy(&ctx_new.range.level[0].cat); } else { - *sid = SECSID_NULL; + *sid = SECINITSID_UNLABELED; rc = 0; } @@ -2374,7 +2369,7 @@ static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, &secattr, base_sid, sid); - netlbl_secattr_destroy(&secattr); + netlbl_secattr_destroy(&secattr, 0); return rc; } @@ -2420,7 +2415,7 @@ static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid) if (rc == 0) sksec->nlbl_state = NLBL_LABELED; - netlbl_secattr_destroy(&secattr); + netlbl_secattr_destroy(&secattr, 0); netlbl_socket_setsid_return: POLICY_RDUNLOCK; @@ -2519,10 +2514,10 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) if (netlbl_sock_getattr(sk, &secattr) == 0 && selinux_netlbl_secattr_to_sid(NULL, &secattr, - SECINITSID_UNLABELED, + sksec->sid, &nlbl_peer_sid) == 0) sksec->peer_sid = nlbl_peer_sid; - netlbl_secattr_destroy(&secattr); + netlbl_secattr_destroy(&secattr, 0); sksec->nlbl_state = NLBL_REQUIRE; @@ -2552,6 +2547,9 @@ u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid) if (rc != 0) return SECSID_NULL; + if (peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + return peer_sid; } @@ -2613,13 +2611,11 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, u32 netlbl_sid; u32 recv_perm; - rc = selinux_netlbl_skbuff_getsid(skb, - SECINITSID_UNLABELED, - &netlbl_sid); + rc = selinux_netlbl_skbuff_getsid(skb, SECINITSID_NETMSG, &netlbl_sid); if (rc != 0) return rc; - if (netlbl_sid == SECSID_NULL) + if (netlbl_sid == SECINITSID_UNLABELED) return 0; switch (sksec->sclass) { @@ -2657,6 +2653,10 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) { struct sk_security_struct *sksec = sock->sk->sk_security; + + if (sksec->peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + return sksec->peer_sid; } @@ -2672,10 +2672,16 @@ u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) { int peer_sid; + struct sock *sk = skb->sk; + struct inode_security_struct *isec; - if (selinux_netlbl_skbuff_getsid(skb, - SECINITSID_UNLABELED, - &peer_sid) != 0) + if (sk == NULL || sk->sk_socket == NULL) + return SECSID_NULL; + + isec = SOCK_INODE(sk->sk_socket)->i_security; + if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0) + return SECSID_NULL; + if (peer_sid == SECINITSID_UNLABELED) return SECSID_NULL; return peer_sid; diff --git a/trunk/security/selinux/xfrm.c b/trunk/security/selinux/xfrm.c index 675b995a67c3..3e742b850af6 100644 --- a/trunk/security/selinux/xfrm.c +++ b/trunk/security/selinux/xfrm.c @@ -77,8 +77,8 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) */ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { - int rc; - u32 sel_sid; + int rc = 0; + u32 sel_sid = SECINITSID_UNLABELED; struct xfrm_sec_ctx *ctx; /* Context sid is either set to label or ANY_ASSOC */ @@ -88,21 +88,11 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) sel_sid = ctx->ctx_sid; } - else - /* - * All flows should be treated as polmatch'ing an - * otherwise applicable "non-labeled" policy. This - * would prevent inadvertent "leaks". - */ - return 0; rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL); - if (rc == -EACCES) - rc = -ESRCH; - return rc; } @@ -118,20 +108,15 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * u32 pol_sid; int err; - if (xp->security) { - if (!x->security) - /* unlabeled SA and labeled policy can't match */ - return 0; - else - state_sid = x->security->ctx_sid; + if (x->security) + state_sid = x->security->ctx_sid; + else + state_sid = SECINITSID_UNLABELED; + + if (xp->security) pol_sid = xp->security->ctx_sid; - } else - if (x->security) - /* unlabeled policy and labeled SA can't match */ - return 0; - else - /* unlabeled policy and unlabeled SA match all flows */ - return 1; + else + pol_sid = SECINITSID_UNLABELED; err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, @@ -140,11 +125,7 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * if (err) return 0; - err = avc_has_perm(fl->secid, state_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, - NULL)? 0:1; - - return err; + return selinux_xfrm_flow_state_match(fl, x); } /* @@ -152,22 +133,12 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * * can use a given security association. */ -int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm, - struct xfrm_policy *xp) +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) { int rc = 0; u32 sel_sid = SECINITSID_UNLABELED; struct xfrm_sec_ctx *ctx; - if (!xp->security) - if (!xfrm->security) - return 1; - else - return 0; - else - if (!xfrm->security) - return 0; - /* Context sid is either set to label or ANY_ASSOC */ if ((ctx = xfrm->security)) { if (!selinux_authorizable_ctx(ctx))