From 1e64f60c06dd235aa8cf6def067cfba5d0128ed7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Oct 2008 12:18:04 -0700 Subject: [PATCH] --- yaml --- r: 112974 b: refs/heads/master c: f901b64472fdabc72eca2b9426fa4e96972b64c4 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/filesystems/ext4.txt | 14 +- trunk/Documentation/filesystems/fiemap.txt | 228 --- trunk/Documentation/filesystems/proc.txt | 73 +- trunk/Documentation/kernel-parameters.txt | 6 - trunk/MAINTAINERS | 5 +- trunk/arch/um/sys-x86_64/syscall_table.c | 4 +- trunk/arch/x86/Kconfig | 8 - trunk/arch/x86/Kconfig.cpu | 54 - trunk/arch/x86/boot/cpu.c | 17 +- trunk/arch/x86/boot/mkcpustr.c | 40 +- trunk/arch/x86/ia32/ia32_signal.c | 51 +- trunk/arch/x86/kernel/Makefile | 5 +- trunk/arch/x86/kernel/acpi/boot.c | 8 +- trunk/arch/x86/kernel/apic_32.c | 437 +++-- trunk/arch/x86/kernel/apic_64.c | 626 +------ trunk/arch/x86/kernel/cpu/Makefile | 34 +- .../x86/kernel/cpu/addon_cpuid_features.c | 88 - trunk/arch/x86/kernel/cpu/amd.c | 548 +++---- trunk/arch/x86/kernel/cpu/amd_64.c | 224 +++ trunk/arch/x86/kernel/cpu/centaur.c | 4 +- trunk/arch/x86/kernel/cpu/centaur_64.c | 6 +- trunk/arch/x86/kernel/cpu/cmpxchg.c | 72 - trunk/arch/x86/kernel/cpu/common.c | 973 ++++------- trunk/arch/x86/kernel/cpu/common_64.c | 763 +++++++++ trunk/arch/x86/kernel/cpu/cpu.h | 19 +- trunk/arch/x86/kernel/cpu/cyrix.c | 23 +- trunk/arch/x86/kernel/cpu/feature_names.c | 84 + trunk/arch/x86/kernel/cpu/intel.c | 364 ++-- trunk/arch/x86/kernel/cpu/intel_64.c | 95 ++ trunk/arch/x86/kernel/cpu/intel_cacheinfo.c | 169 +- trunk/arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- trunk/arch/x86/kernel/cpu/mkcapflags.pl | 32 - trunk/arch/x86/kernel/cpu/powerflags.c | 20 - trunk/arch/x86/kernel/cpu/transmeta.c | 32 +- trunk/arch/x86/kernel/cpu/umc.c | 3 +- trunk/arch/x86/kernel/e820.c | 28 +- trunk/arch/x86/kernel/genapic_64.c | 88 +- trunk/arch/x86/kernel/genapic_flat_64.c | 62 +- trunk/arch/x86/kernel/genx2apic_cluster.c | 159 -- trunk/arch/x86/kernel/genx2apic_phys.c | 154 -- trunk/arch/x86/kernel/genx2apic_uv_x.c | 70 +- trunk/arch/x86/kernel/i387.c | 154 +- trunk/arch/x86/kernel/i8259.c | 24 - trunk/arch/x86/kernel/io_apic_32.c | 47 +- trunk/arch/x86/kernel/io_apic_64.c | 639 +------- trunk/arch/x86/kernel/irqinit_32.c | 49 - trunk/arch/x86/kernel/mpparse.c | 2 - trunk/arch/x86/kernel/numaq_32.c | 7 - trunk/arch/x86/kernel/paravirt.c | 2 + trunk/arch/x86/kernel/process.c | 1 + trunk/arch/x86/kernel/setup.c | 2 - trunk/arch/x86/kernel/sigframe.h | 14 +- trunk/arch/x86/kernel/signal_32.c | 45 +- trunk/arch/x86/kernel/signal_64.c | 95 +- trunk/arch/x86/kernel/smpboot.c | 38 +- trunk/arch/x86/kernel/summit_32.c | 2 +- trunk/arch/x86/kernel/traps_32.c | 1 + trunk/arch/x86/kernel/traps_64.c | 6 +- trunk/arch/x86/kernel/vmi_32.c | 4 +- trunk/arch/x86/kernel/vmlinux_32.lds.S | 9 +- trunk/arch/x86/kernel/vmlinux_64.lds.S | 9 +- trunk/arch/x86/kernel/xsave.c | 316 ---- trunk/arch/x86/kvm/vmx.h | 15 + trunk/arch/x86/lguest/boot.c | 38 +- trunk/arch/x86/lib/Makefile | 3 + trunk/arch/x86/lib/usercopy_32.c | 7 - trunk/arch/x86/mach-default/setup.c | 15 + trunk/arch/x86/mach-es7000/Makefile | 5 + trunk/arch/x86/mach-es7000/es7000.h | 114 ++ .../es7000_32.c => mach-es7000/es7000plat.c} | 87 +- trunk/arch/x86/mach-generic/Makefile | 1 + trunk/arch/x86/mach-generic/bigsmp.c | 9 +- trunk/arch/x86/mach-generic/es7000.c | 13 +- trunk/arch/x86/mach-generic/numaq.c | 12 +- trunk/arch/x86/mach-generic/summit.c | 11 +- trunk/arch/x86/mm/init_64.c | 56 - trunk/arch/x86/pci/acpi.c | 5 + trunk/arch/x86/pci/i386.c | 3 - trunk/arch/x86/pci/mmconfig-shared.c | 12 +- trunk/arch/x86/power/cpu_32.c | 7 - trunk/arch/x86/power/cpu_64.c | 7 - trunk/arch/x86/xen/enlighten.c | 45 +- trunk/drivers/acpi/glue.c | 3 +- trunk/drivers/ata/libata-scsi.c | 2 +- trunk/drivers/block/hd.c | 9 +- trunk/drivers/ide/Kconfig | 23 + trunk/drivers/ide/Makefile | 7 +- trunk/drivers/ide/arm/icside.c | 5 +- trunk/drivers/ide/arm/palm_bk3710.c | 8 +- trunk/drivers/ide/ide-acpi.c | 6 +- trunk/drivers/ide/ide-atapi.c | 236 +-- trunk/drivers/ide/ide-cd.c | 72 +- trunk/drivers/ide/ide-disk.c | 340 ++-- trunk/drivers/ide/ide-dma.c | 52 +- trunk/drivers/ide/ide-floppy.c | 668 ++++++-- trunk/drivers/ide/ide-floppy.h | 63 - trunk/drivers/ide/ide-floppy_ioctl.c | 243 --- trunk/drivers/ide/ide-generic.c | 55 +- trunk/drivers/ide/ide-io.c | 117 +- trunk/drivers/ide/ide-ioctls.c | 290 ---- trunk/drivers/ide/ide-iops.c | 225 ++- trunk/drivers/ide/ide-lib.c | 73 +- trunk/drivers/ide/ide-probe.c | 252 +-- trunk/drivers/ide/ide-proc.c | 306 ++-- trunk/drivers/ide/ide-tape.c | 478 ++++-- trunk/drivers/ide/ide-taskfile.c | 156 +- trunk/drivers/ide/ide-timings.c | 22 +- trunk/drivers/ide/ide.c | 238 ++- trunk/drivers/ide/legacy/ali14xx.c | 1 + trunk/drivers/ide/legacy/buddha.c | 1 + trunk/drivers/ide/legacy/dtc2278.c | 1 + trunk/drivers/ide/legacy/falconide.c | 1 + trunk/drivers/ide/legacy/gayle.c | 1 + trunk/drivers/ide/legacy/ht6560b.c | 1 + trunk/drivers/ide/legacy/ide-cs.c | 1 + trunk/drivers/ide/legacy/macide.c | 1 + trunk/drivers/ide/legacy/q40ide.c | 2 + trunk/drivers/ide/legacy/qd65xx.c | 23 +- trunk/drivers/ide/legacy/umc8672.c | 1 + trunk/drivers/ide/pci/aec62xx.c | 5 +- trunk/drivers/ide/pci/alim15x3.c | 9 +- trunk/drivers/ide/pci/amd74xx.c | 8 +- trunk/drivers/ide/pci/atiixp.c | 3 +- trunk/drivers/ide/pci/cmd640.c | 43 +- trunk/drivers/ide/pci/cmd64x.c | 5 +- trunk/drivers/ide/pci/cs5520.c | 3 +- trunk/drivers/ide/pci/cs5530.c | 19 +- trunk/drivers/ide/pci/cs5535.c | 14 +- trunk/drivers/ide/pci/cy82c693.c | 4 +- trunk/drivers/ide/pci/delkin_cb.c | 1 + trunk/drivers/ide/pci/generic.c | 3 +- trunk/drivers/ide/pci/hpt34x.c | 5 +- trunk/drivers/ide/pci/hpt366.c | 77 +- trunk/drivers/ide/pci/it8213.c | 3 +- trunk/drivers/ide/pci/it821x.c | 58 +- trunk/drivers/ide/pci/jmicron.c | 3 +- trunk/drivers/ide/pci/ns87415.c | 9 +- trunk/drivers/ide/pci/opti621.c | 7 +- trunk/drivers/ide/pci/pdc202xx_new.c | 15 +- trunk/drivers/ide/pci/pdc202xx_old.c | 11 +- trunk/drivers/ide/pci/piix.c | 5 +- trunk/drivers/ide/pci/rz1000.c | 1 + trunk/drivers/ide/pci/sc1200.c | 15 +- trunk/drivers/ide/pci/scc_pata.c | 5 +- trunk/drivers/ide/pci/serverworks.c | 9 +- trunk/drivers/ide/pci/sgiioc4.c | 1 + trunk/drivers/ide/pci/siimage.c | 17 +- trunk/drivers/ide/pci/sis5513.c | 5 +- trunk/drivers/ide/pci/sl82c105.c | 7 +- trunk/drivers/ide/pci/slc90e66.c | 3 +- trunk/drivers/ide/pci/triflex.c | 3 +- trunk/drivers/ide/pci/trm290.c | 1 + trunk/drivers/ide/pci/via82cxxx.c | 10 +- trunk/drivers/ide/ppc/pmac.c | 6 +- trunk/drivers/ide/setup-pci.c | 33 - trunk/drivers/pci/Makefile | 2 - trunk/drivers/pci/dma_remapping.h | 157 -- trunk/drivers/pci/dmar.c | 397 +---- trunk/drivers/pci/intel-iommu.c | 185 ++- trunk/drivers/pci/intel-iommu.h | 233 +-- trunk/drivers/pci/intr_remapping.c | 471 ------ trunk/drivers/pci/intr_remapping.h | 8 - trunk/drivers/scsi/ide-scsi.c | 120 +- trunk/fs/Kconfig | 88 +- trunk/fs/Makefile | 2 +- trunk/fs/ext2/ext2.h | 2 - trunk/fs/ext2/file.c | 1 - trunk/fs/ext2/inode.c | 8 - trunk/fs/ext3/file.c | 1 - trunk/fs/ext3/inode.c | 8 - trunk/fs/ext4/Makefile | 10 +- trunk/fs/ext4/acl.h | 12 +- trunk/fs/ext4/balloc.c | 1457 ++++++++++++++++- trunk/fs/ext4/bitmap.c | 6 +- trunk/fs/ext4/dir.c | 64 +- trunk/fs/ext4/ext4.h | 131 +- trunk/fs/ext4/ext4_extents.h | 15 - trunk/fs/ext4/ext4_i.h | 39 +- trunk/fs/ext4/ext4_sb.h | 25 +- trunk/fs/ext4/extents.c | 281 +--- trunk/fs/ext4/file.c | 10 +- trunk/fs/ext4/fsync.c | 7 +- trunk/fs/ext4/hash.c | 8 +- trunk/fs/ext4/ialloc.c | 71 +- trunk/fs/ext4/inode.c | 620 +++---- trunk/fs/ext4/ioctl.c | 84 +- trunk/fs/ext4/mballoc.c | 220 ++- trunk/fs/ext4/mballoc.h | 1 + trunk/fs/ext4/migrate.c | 10 +- trunk/fs/ext4/namei.c | 402 +++-- trunk/fs/ext4/resize.c | 33 +- trunk/fs/ext4/super.c | 274 +--- trunk/fs/ext4/symlink.c | 8 +- trunk/fs/ext4/xattr.c | 14 +- trunk/fs/ext4/xattr.h | 12 +- trunk/fs/ioctl.c | 273 --- trunk/fs/jbd2/checkpoint.c | 22 +- trunk/fs/jbd2/commit.c | 22 +- trunk/fs/jbd2/journal.c | 75 +- trunk/fs/ocfs2/alloc.c | 9 + trunk/fs/ocfs2/alloc.h | 9 - trunk/fs/ocfs2/extent_map.c | 346 +--- trunk/fs/ocfs2/extent_map.h | 3 - trunk/fs/ocfs2/file.c | 1 - trunk/include/asm-generic/vmlinux.lds.h | 1 - trunk/include/asm-x86/apic.h | 65 +- trunk/include/asm-x86/apicdef.h | 3 - trunk/include/asm-x86/arch_hooks.h | 2 + trunk/include/asm-x86/bigsmp/apicdef.h | 13 - trunk/include/asm-x86/bugs.h | 5 - trunk/include/asm-x86/cpufeature.h | 118 +- trunk/include/asm-x86/e820.h | 2 - trunk/include/asm-x86/es7000/apicdef.h | 13 - trunk/include/asm-x86/genapic_64.h | 8 - trunk/include/asm-x86/hw_irq.h | 3 +- trunk/include/asm-x86/i387.h | 84 +- trunk/include/asm-x86/i8259.h | 3 - trunk/include/asm-x86/io_apic.h | 20 - trunk/include/asm-x86/ipi.h | 16 +- trunk/include/asm-x86/irq_remapping.h | 8 - .../apic.h => mach-bigsmp/mach_apic.h} | 10 +- .../asm-x86/mach-bigsmp/mach_apicdef.h | 13 + .../{bigsmp/ipi.h => mach-bigsmp/mach_ipi.h} | 6 +- .../include/asm-x86/mach-default/mach_apic.h | 4 +- .../asm-x86/mach-default/mach_apicdef.h | 6 +- .../apic.h => mach-es7000/mach_apic.h} | 32 +- .../asm-x86/mach-es7000/mach_apicdef.h | 13 + .../{es7000/ipi.h => mach-es7000/mach_ipi.h} | 6 +- .../mpparse.h => mach-es7000/mach_mpparse.h} | 6 +- .../wakecpu.h => mach-es7000/mach_wakecpu.h} | 8 +- .../{numaq/apic.h => mach-numaq/mach_apic.h} | 6 +- .../apicdef.h => mach-numaq/mach_apicdef.h} | 6 +- .../{numaq/ipi.h => mach-numaq/mach_ipi.h} | 6 +- .../include/asm-x86/mach-numaq/mach_mpparse.h | 7 + .../wakecpu.h => mach-numaq/mach_wakecpu.h} | 6 +- .../irq_vectors_limits.h | 6 +- .../apic.h => mach-summit/mach_apic.h} | 24 +- .../asm-x86/mach-summit/mach_apicdef.h | 13 + .../{summit/ipi.h => mach-summit/mach_ipi.h} | 6 +- .../mpparse.h => mach-summit/mach_mpparse.h} | 13 +- trunk/include/asm-x86/mpspec.h | 3 +- trunk/include/asm-x86/msidef.h | 4 - trunk/include/asm-x86/msr-index.h | 16 - trunk/include/asm-x86/numaq/mpparse.h | 7 - trunk/include/asm-x86/paravirt.h | 19 + trunk/include/asm-x86/processor-cyrix.h | 8 - trunk/include/asm-x86/processor-flags.h | 1 - trunk/include/asm-x86/processor.h | 27 +- trunk/include/asm-x86/setup.h | 1 - trunk/include/asm-x86/sigcontext.h | 87 +- trunk/include/asm-x86/sigcontext32.h | 6 +- trunk/include/asm-x86/smp.h | 17 +- trunk/include/asm-x86/summit/apicdef.h | 13 - trunk/include/asm-x86/thread_info.h | 1 - trunk/include/asm-x86/ucontext.h | 6 - trunk/include/asm-x86/xcr.h | 49 - trunk/include/asm-x86/xsave.h | 118 -- trunk/include/linux/ata.h | 112 +- trunk/include/linux/dmar.h | 127 +- trunk/include/linux/ext3_fs.h | 2 - trunk/include/linux/fiemap.h | 64 - trunk/include/linux/fs.h | 21 - trunk/include/linux/ide.h | 316 ++-- trunk/include/linux/ioport.h | 3 - trunk/include/linux/irq.h | 1 - trunk/include/linux/jbd2.h | 3 +- trunk/include/linux/percpu.h | 7 - trunk/include/linux/percpu_counter.h | 12 +- trunk/kernel/irq/manage.c | 9 +- trunk/kernel/resource.c | 68 - trunk/lib/percpu_counter.c | 8 +- trunk/net/netfilter/ipvs/Kconfig | 4 +- 273 files changed, 8391 insertions(+), 11601 deletions(-) delete mode 100644 trunk/Documentation/filesystems/fiemap.txt create mode 100644 trunk/arch/x86/kernel/cpu/amd_64.c delete mode 100644 trunk/arch/x86/kernel/cpu/cmpxchg.c create mode 100644 trunk/arch/x86/kernel/cpu/common_64.c create mode 100644 trunk/arch/x86/kernel/cpu/feature_names.c create mode 100644 trunk/arch/x86/kernel/cpu/intel_64.c delete mode 100644 trunk/arch/x86/kernel/cpu/mkcapflags.pl delete mode 100644 trunk/arch/x86/kernel/cpu/powerflags.c delete mode 100644 trunk/arch/x86/kernel/genx2apic_cluster.c delete mode 100644 trunk/arch/x86/kernel/genx2apic_phys.c delete mode 100644 trunk/arch/x86/kernel/xsave.c create mode 100644 trunk/arch/x86/mach-es7000/Makefile create mode 100644 trunk/arch/x86/mach-es7000/es7000.h rename trunk/arch/x86/{kernel/es7000_32.c => mach-es7000/es7000plat.c} (78%) delete mode 100644 trunk/drivers/ide/ide-floppy.h delete mode 100644 trunk/drivers/ide/ide-floppy_ioctl.c delete mode 100644 trunk/drivers/ide/ide-ioctls.c delete mode 100644 trunk/drivers/pci/dma_remapping.h delete mode 100644 trunk/drivers/pci/intr_remapping.c delete mode 100644 trunk/drivers/pci/intr_remapping.h delete mode 100644 trunk/include/asm-x86/bigsmp/apicdef.h delete mode 100644 trunk/include/asm-x86/es7000/apicdef.h delete mode 100644 trunk/include/asm-x86/irq_remapping.h rename trunk/include/asm-x86/{bigsmp/apic.h => mach-bigsmp/mach_apic.h} (94%) create mode 100644 trunk/include/asm-x86/mach-bigsmp/mach_apicdef.h rename trunk/include/asm-x86/{bigsmp/ipi.h => mach-bigsmp/mach_ipi.h} (77%) rename trunk/include/asm-x86/{es7000/apic.h => mach-es7000/mach_apic.h} (91%) create mode 100644 trunk/include/asm-x86/mach-es7000/mach_apicdef.h rename trunk/include/asm-x86/{es7000/ipi.h => mach-es7000/mach_ipi.h} (77%) rename trunk/include/asm-x86/{es7000/mpparse.h => mach-es7000/mach_mpparse.h} (81%) rename trunk/include/asm-x86/{es7000/wakecpu.h => mach-es7000/mach_wakecpu.h} (89%) rename trunk/include/asm-x86/{numaq/apic.h => mach-numaq/mach_apic.h} (96%) rename trunk/include/asm-x86/{numaq/apicdef.h => mach-numaq/mach_apicdef.h} (55%) rename trunk/include/asm-x86/{numaq/ipi.h => mach-numaq/mach_ipi.h} (77%) create mode 100644 trunk/include/asm-x86/mach-numaq/mach_mpparse.h rename trunk/include/asm-x86/{numaq/wakecpu.h => mach-numaq/mach_wakecpu.h} (88%) rename trunk/include/asm-x86/{summit => mach-summit}/irq_vectors_limits.h (65%) rename trunk/include/asm-x86/{summit/apic.h => mach-summit/mach_apic.h} (93%) create mode 100644 trunk/include/asm-x86/mach-summit/mach_apicdef.h rename trunk/include/asm-x86/{summit/ipi.h => mach-summit/mach_ipi.h} (77%) rename trunk/include/asm-x86/{summit/mpparse.h => mach-summit/mach_mpparse.h} (95%) delete mode 100644 trunk/include/asm-x86/numaq/mpparse.h delete mode 100644 trunk/include/asm-x86/summit/apicdef.h delete mode 100644 trunk/include/asm-x86/xcr.h delete mode 100644 trunk/include/asm-x86/xsave.h delete mode 100644 trunk/include/linux/fiemap.h diff --git a/[refs] b/[refs] index 16e214e57d36..aecd10d72539 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 1efd325fbadc02c1338e0ef676f0a6669b251c7a +refs/heads/master: f901b64472fdabc72eca2b9426fa4e96972b64c4 diff --git a/trunk/Documentation/filesystems/ext4.txt b/trunk/Documentation/filesystems/ext4.txt index 74484e696405..0d5394920a31 100644 --- a/trunk/Documentation/filesystems/ext4.txt +++ b/trunk/Documentation/filesystems/ext4.txt @@ -32,9 +32,9 @@ Mailing list: linux-ext4@vger.kernel.org you will need to merge your changes with the version from e2fsprogs 1.41.x. - - Create a new filesystem using the ext4 filesystem type: + - Create a new filesystem using the ext4dev filesystem type: - # mke2fs -t ext4 /dev/hda1 + # mke2fs -t ext4dev /dev/hda1 Or configure an existing ext3 filesystem to support extents and set the test_fs flag to indicate that it's ok for an in-development @@ -47,13 +47,13 @@ Mailing list: linux-ext4@vger.kernel.org # tune2fs -I 256 /dev/hda1 - (Note: we currently do not have tools to convert an ext4 + (Note: we currently do not have tools to convert an ext4dev filesystem back to ext3; so please do not do try this on production filesystems.) - Mounting: - # mount -t ext4 /dev/hda1 /wherever + # mount -t ext4dev /dev/hda1 /wherever - When comparing performance with other filesystems, remember that ext3/4 by default offers higher data integrity guarantees than most. @@ -177,11 +177,6 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in your disks are battery-backed in one way or another, disabling barriers may safely improve performance. -inode_readahead=n This tuning parameter controls the maximum - number of inode table blocks that ext4's inode - table readahead algorithm will pre-read into - the buffer cache. The default value is 32 blocks. - orlov (*) This enables the new Orlov block allocator. It is enabled by default. @@ -257,7 +252,6 @@ stripe=n Number of filesystem blocks that mballoc will try delalloc (*) Deferring block allocation until write-out time. nodelalloc Disable delayed allocation. Blocks are allocation when data is copied from user to page cache. - Data Mode ========= There are 3 different data modes: diff --git a/trunk/Documentation/filesystems/fiemap.txt b/trunk/Documentation/filesystems/fiemap.txt deleted file mode 100644 index 1e3defcfe50b..000000000000 --- a/trunk/Documentation/filesystems/fiemap.txt +++ /dev/null @@ -1,228 +0,0 @@ -============ -Fiemap Ioctl -============ - -The fiemap ioctl is an efficient method for userspace to get file -extent mappings. Instead of block-by-block mapping (such as bmap), fiemap -returns a list of extents. - - -Request Basics --------------- - -A fiemap request is encoded within struct fiemap: - -struct fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) */ - __u64 fm_length; /* logical length of mapping which - * userspace cares about (in) */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents; /* number of extents that were - * mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ - __u32 fm_reserved; - struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ -}; - - -fm_start, and fm_length specify the logical range within the file -which the process would like mappings for. Extents returned mirror -those on disk - that is, the logical offset of the 1st returned extent -may start before fm_start, and the range covered by the last returned -extent may end after fm_length. All offsets and lengths are in bytes. - -Certain flags to modify the way in which mappings are looked up can be -set in fm_flags. If the kernel doesn't understand some particular -flags, it will return EBADR and the contents of fm_flags will contain -the set of flags which caused the error. If the kernel is compatible -with all flags passed, the contents of fm_flags will be unmodified. -It is up to userspace to determine whether rejection of a particular -flag is fatal to it's operation. This scheme is intended to allow the -fiemap interface to grow in the future but without losing -compatibility with old software. - -fm_extent_count specifies the number of elements in the fm_extents[] array -that can be used to return extents. If fm_extent_count is zero, then the -fm_extents[] array is ignored (no extents will be returned), and the -fm_mapped_extents count will hold the number of extents needed in -fm_extents[] to hold the file's current mapping. Note that there is -nothing to prevent the file from changing between calls to FIEMAP. - -The following flags can be set in fm_flags: - -* FIEMAP_FLAG_SYNC -If this flag is set, the kernel will sync the file before mapping extents. - -* FIEMAP_FLAG_XATTR -If this flag is set, the extents returned will describe the inodes -extended attribute lookup tree, instead of it's data tree. - - -Extent Mapping --------------- - -Extent information is returned within the embedded fm_extents array -which userspace must allocate along with the fiemap structure. The -number of elements in the fiemap_extents[] array should be passed via -fm_extent_count. The number of extents mapped by kernel will be -returned via fm_mapped_extents. If the number of fiemap_extents -allocated is less than would be required to map the requested range, -the maximum number of extents that can be mapped in the fm_extent[] -array will be returned and fm_mapped_extents will be equal to -fm_extent_count. In that case, the last extent in the array will not -complete the requested range and will not have the FIEMAP_EXTENT_LAST -flag set (see the next section on extent flags). - -Each extent is described by a single fiemap_extent structure as -returned in fm_extents. - -struct fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent */ - __u64 fe_length; /* length in bytes for the extent */ - __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ - __u32 fe_reserved[3]; -}; - -All offsets and lengths are in bytes and mirror those on disk. It is valid -for an extents logical offset to start before the request or it's logical -length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is -returned, fe_logical, fe_physical, and fe_length will be aligned to the -block size of the file system. With the exception of extents flagged as -FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. - -The fe_flags field contains flags which describe the extent returned. -A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in -the file so that the process making fiemap calls can determine when no -more extents are available, without having to call the ioctl again. - -Some flags are intentionally vague and will always be set in the -presence of other more specific flags. This way a program looking for -a general property does not have to know all existing and future flags -which imply that property. - -For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL -are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking -for inline or tail-packed data can key on the specific flag. Software -which simply cares not to try operating on non-aligned extents -however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to -worry about all present and future flags which might imply unaligned -data. Note that the opposite is not true - it would be valid for -FIEMAP_EXTENT_NOT_ALIGNED to appear alone. - -* FIEMAP_EXTENT_LAST -This is the last extent in the file. A mapping attempt past this -extent will return nothing. - -* FIEMAP_EXTENT_UNKNOWN -The location of this extent is currently unknown. This may indicate -the data is stored on an inaccessible volume or that no storage has -been allocated for the file yet. - -* FIEMAP_EXTENT_DELALLOC - - This will also set FIEMAP_EXTENT_UNKNOWN. -Delayed allocation - while there is data for this extent, it's -physical location has not been allocated yet. - -* FIEMAP_EXTENT_ENCODED -This extent does not consist of plain filesystem blocks but is -encoded (e.g. encrypted or compressed). Reading the data in this -extent via I/O to the block device will have undefined results. - -Note that it is *always* undefined to try to update the data -in-place by writing to the indicated location without the -assistance of the filesystem, or to access the data using the -information returned by the FIEMAP interface while the filesystem -is mounted. In other words, user applications may only read the -extent data via I/O to the block device while the filesystem is -unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is -clear; user applications must not try reading or writing to the -filesystem via the block device under any other circumstances. - -* FIEMAP_EXTENT_DATA_ENCRYPTED - - This will also set FIEMAP_EXTENT_ENCODED -The data in this extent has been encrypted by the file system. - -* FIEMAP_EXTENT_NOT_ALIGNED -Extent offsets and length are not guaranteed to be block aligned. - -* FIEMAP_EXTENT_DATA_INLINE - This will also set FIEMAP_EXTENT_NOT_ALIGNED -Data is located within a meta data block. - -* FIEMAP_EXTENT_DATA_TAIL - This will also set FIEMAP_EXTENT_NOT_ALIGNED -Data is packed into a block with data from other files. - -* FIEMAP_EXTENT_UNWRITTEN -Unwritten extent - the extent is allocated but it's data has not been -initialized. This indicates the extent's data will be all zero if read -through the filesystem but the contents are undefined if read directly from -the device. - -* FIEMAP_EXTENT_MERGED -This will be set when a file does not support extents, i.e., it uses a block -based addressing scheme. Since returning an extent for each block back to -userspace would be highly inefficient, the kernel will try to merge most -adjacent blocks into 'extents'. - - -VFS -> File System Implementation ---------------------------------- - -File systems wishing to support fiemap must implement a ->fiemap callback on -their inode_operations structure. The fs ->fiemap call is responsible for -defining it's set of supported fiemap flags, and calling a helper function on -each discovered extent: - -struct inode_operations { - ... - - int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, - u64 len); - -->fiemap is passed struct fiemap_extent_info which describes the -fiemap request: - -struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ -}; - -It is intended that the file system should not need to access any of this -structure directly. - - -Flag checking should be done at the beginning of the ->fiemap callback via the -fiemap_check_flags() helper: - -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); - -The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The -set of fiemap flags which the fs understands should be passed via fs_flags. If -fiemap_check_flags finds invalid user flags, it will place the bad values in -fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from -fiemap_check_flags(), it should immediately exit, returning that error back to -ioctl_fiemap(). - - -For each extent in the request range, the file system should call -the helper function, fiemap_fill_next_extent(): - -int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, - u64 phys, u64 len, u32 flags, u32 dev); - -fiemap_fill_next_extent() will use the passed values to populate the -next free extent in the fm_extents array. 'General' extent flags will -automatically be set from specific flags on behalf of the calling file -system so that the userspace API is not broken. - -fiemap_fill_next_extent() returns 0 on success, and 1 when the -user-supplied fm_extents array is full. If an error is encountered -while copying the extent to user memory, -EFAULT will be returned. diff --git a/trunk/Documentation/filesystems/proc.txt b/trunk/Documentation/filesystems/proc.txt index d831d24d2a6c..f566ad9bcb7b 100644 --- a/trunk/Documentation/filesystems/proc.txt +++ b/trunk/Documentation/filesystems/proc.txt @@ -923,44 +923,45 @@ CPUs. The "procs_blocked" line gives the number of processes currently blocked, waiting for I/O to complete. - 1.9 Ext4 file system parameters ------------------------------ - -Information about mounted ext4 file systems can be found in -/proc/fs/ext4. Each mounted filesystem will have a directory in -/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or -/proc/fs/ext4/dm-0). The files in each per-device directory are shown -in Table 1-10, below. - -Table 1-10: Files in /proc/fs/ext4/ -.............................................................................. - File Content - mb_groups details of multiblock allocator buddy cache of free blocks - mb_history multiblock allocation history - stats controls whether the multiblock allocator should start - collecting statistics, which are shown during the unmount - group_prealloc the multiblock allocator will round up allocation - requests to a multiple of this tuning parameter if the - stripe size is not set in the ext4 superblock - max_to_scan The maximum number of extents the multiblock allocator - will search to find the best extent - min_to_scan The minimum number of extents the multiblock allocator - will search to find the best extent - order2_req Tuning parameter which controls the minimum size for - requests (as a power of 2) where the buddy cache is - used - stream_req Files which have fewer blocks than this tunable - parameter will have their blocks allocated out of a - block group specific preallocation pool, so that small - files are packed closely together. Each large file - will have its blocks allocated out of its own unique - preallocation pool. -inode_readahead Tuning parameter which controls the maximum number of - inode table blocks that ext4's inode table readahead - algorithm will pre-read into the buffer cache -.............................................................................. - +Ext4 file system have one directory per partition under /proc/fs/ext4/ +# ls /proc/fs/ext4/hdc/ +group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req +stats stream_req + +mb_groups: +This file gives the details of multiblock allocator buddy cache of free blocks + +mb_history: +Multiblock allocation history. + +stats: +This file indicate whether the multiblock allocator should start collecting +statistics. The statistics are shown during unmount + +group_prealloc: +The multiblock allocator normalize the block allocation request to +group_prealloc filesystem blocks if we don't have strip value set. +The stripe value can be specified at mount time or during mke2fs. + +max_to_scan: +How long multiblock allocator can look for a best extent (in found extents) + +min_to_scan: +How long multiblock allocator must look for a best extent + +order2_req: +Multiblock allocator use 2^N search using buddies only for requests greater +than or equal to order2_req. The request size is specfied in file system +blocks. A value of 2 indicate only if the requests are greater than or equal +to 4 blocks. + +stream_req: +Files smaller than stream_req are served by the stream allocator, whose +purpose is to pack requests as close each to other as possible to +produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 +filesystem block size will use group based preallocation. ------------------------------------------------------------------------------ Summary diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 25efbaf1f59b..2ca9c8f8c8d8 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1428,12 +1428,6 @@ and is between 256 and 4096 characters. It is defined in the file nolapic_timer [X86-32,APIC] Do not use the local APIC timer. - nox2apic [X86-64,APIC] Do not enable x2APIC mode. - - x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of - default x2apic cluster mode on platforms - supporting x2apic. - noltlbs [PPC] Do not use large page/tlb entries for kernel lowmem mapping on PPC40x. diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 587f418ed00d..68781ed2b734 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1659,10 +1659,9 @@ L: linux-ext4@vger.kernel.org S: Maintained EXT4 FILE SYSTEM -P: Theodore Ts'o -M: tytso@mit.edu, adilger@sun.com +P: Stephen Tweedie, Andrew Morton +M: sct@redhat.com, akpm@linux-foundation.org, adilger@sun.com L: linux-ext4@vger.kernel.org -W: http://ext4.wiki.kernel.org S: Maintained F71805F HARDWARE MONITORING DRIVER diff --git a/trunk/arch/um/sys-x86_64/syscall_table.c b/trunk/arch/um/sys-x86_64/syscall_table.c index 32f5fbe2d0d2..c128eb897008 100644 --- a/trunk/arch/um/sys-x86_64/syscall_table.c +++ b/trunk/arch/um/sys-x86_64/syscall_table.c @@ -41,12 +41,12 @@ #define stub_rt_sigreturn sys_rt_sigreturn #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef ASM_X86__UNISTD_64_H +#undef _ASM_X86_64_UNISTD_H_ #include #undef __SYSCALL #define __SYSCALL(nr, sym) [ nr ] = sym, -#undef ASM_X86__UNISTD_64_H +#undef _ASM_X86_64_UNISTD_H_ typedef void (*sys_call_ptr_t)(void); diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 44d4f2130d01..0d7cdbbfc1ee 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -1689,14 +1689,6 @@ config DMAR_FLOPPY_WA workaround will setup a 1:1 mapping for the first 16M to make floppy (an ISA device) work. -config INTR_REMAP - bool "Support for Interrupt Remapping (EXPERIMENTAL)" - depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL - help - Supports Interrupt remapping for IO-APIC and MSI devices. - To use x2apic mode in the CPU's which support x2APIC enhancements or - to support platforms with CPU's having > 8 bit APIC ID, say Y. - source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" diff --git a/trunk/arch/x86/Kconfig.cpu b/trunk/arch/x86/Kconfig.cpu index f8843c3ae77d..60a85768cfcb 100644 --- a/trunk/arch/x86/Kconfig.cpu +++ b/trunk/arch/x86/Kconfig.cpu @@ -419,60 +419,6 @@ config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) -menuconfig PROCESSOR_SELECT - default y - bool "Supported processor vendors" if EMBEDDED - help - This lets you choose what x86 vendor support code your kernel - will include. - -config CPU_SUP_INTEL - default y - bool "Support Intel processors" if PROCESSOR_SELECT - help - This enables extended support for Intel processors - -config CPU_SUP_CYRIX_32 - default y - bool "Support Cyrix processors" if PROCESSOR_SELECT - depends on !64BIT - help - This enables extended support for Cyrix processors - -config CPU_SUP_AMD - default y - bool "Support AMD processors" if PROCESSOR_SELECT - help - This enables extended support for AMD processors - -config CPU_SUP_CENTAUR_32 - default y - bool "Support Centaur processors" if PROCESSOR_SELECT - depends on !64BIT - help - This enables extended support for Centaur processors - -config CPU_SUP_CENTAUR_64 - default y - bool "Support Centaur processors" if PROCESSOR_SELECT - depends on 64BIT - help - This enables extended support for Centaur processors - -config CPU_SUP_TRANSMETA_32 - default y - bool "Support Transmeta processors" if PROCESSOR_SELECT - depends on !64BIT - help - This enables extended support for Transmeta processors - -config CPU_SUP_UMC_32 - default y - bool "Support UMC processors" if PROCESSOR_SELECT - depends on !64BIT - help - This enables extended support for UMC processors - config X86_DS bool "Debug Store support" default y diff --git a/trunk/arch/x86/boot/cpu.c b/trunk/arch/x86/boot/cpu.c index 6ec6bb6e9957..75298fe2edca 100644 --- a/trunk/arch/x86/boot/cpu.c +++ b/trunk/arch/x86/boot/cpu.c @@ -59,18 +59,17 @@ int validate_cpu(void) u32 e = err_flags[i]; for (j = 0; j < 32; j++) { - if (msg_strs[0] < i || - (msg_strs[0] == i && msg_strs[1] < j)) { + int n = (i << 5)+j; + if (*msg_strs < n) { /* Skip to the next string */ - msg_strs += 2; - while (*msg_strs++) - ; + do { + msg_strs++; + } while (*msg_strs); + msg_strs++; } if (e & 1) { - if (msg_strs[0] == i && - msg_strs[1] == j && - msg_strs[2]) - printf("%s ", msg_strs+2); + if (*msg_strs == n && msg_strs[1]) + printf("%s ", msg_strs+1); else printf("%d:%d ", i, j); } diff --git a/trunk/arch/x86/boot/mkcpustr.c b/trunk/arch/x86/boot/mkcpustr.c index 8ef60f20b371..bbe76953bae9 100644 --- a/trunk/arch/x86/boot/mkcpustr.c +++ b/trunk/arch/x86/boot/mkcpustr.c @@ -15,33 +15,33 @@ #include -#include "../kernel/cpu/capflags.c" +#include "../kernel/cpu/feature_names.c" + +#if NCAPFLAGS > 8 +# error "Need to adjust the boot code handling of CPUID strings" +#endif int main(void) { - int i, j; + int i; const char *str; printf("static const char x86_cap_strs[] = \n"); - for (i = 0; i < NCAPINTS; i++) { - for (j = 0; j < 32; j++) { - str = x86_cap_flags[i*32+j]; - - if (i == NCAPINTS-1 && j == 31) { - /* The last entry must be unconditional; this - also consumes the compiler-added null - character */ - if (!str) - str = ""; - printf("\t\"\\x%02x\\x%02x\"\"%s\"\n", - i, j, str); - } else if (str) { - printf("#if REQUIRED_MASK%d & (1 << %d)\n" - "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n" - "#endif\n", - i, j, i, j, str); - } + for (i = 0; i < NCAPINTS*32; i++) { + str = x86_cap_flags[i]; + + if (i == NCAPINTS*32-1) { + /* The last entry must be unconditional; this + also consumes the compiler-added null character */ + if (!str) + str = ""; + printf("\t\"\\x%02x\"\"%s\"\n", i, str); + } else if (str) { + printf("#if REQUIRED_MASK%d & (1 << %d)\n" + "\t\"\\x%02x\"\"%s\\0\"\n" + "#endif\n", + i >> 5, i & 31, i, str); } } printf("\t;\n"); diff --git a/trunk/arch/x86/ia32/ia32_signal.c b/trunk/arch/x86/ia32/ia32_signal.c index 8d64c1bc8474..f1a2ac777faf 100644 --- a/trunk/arch/x86/ia32/ia32_signal.c +++ b/trunk/arch/x86/ia32/ia32_signal.c @@ -179,10 +179,9 @@ struct sigframe u32 pretcode; int sig; struct sigcontext_ia32 sc; - struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */ + struct _fpstate_ia32 fpstate; unsigned int extramask[_COMPAT_NSIG_WORDS-1]; char retcode[8]; - /* fp state follows here */ }; struct rt_sigframe @@ -193,8 +192,8 @@ struct rt_sigframe u32 puc; compat_siginfo_t info; struct ucontext_ia32 uc; + struct _fpstate_ia32 fpstate; char retcode[8]; - /* fp state follows here */ }; #define COPY(x) { \ @@ -216,7 +215,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, unsigned int *peax) { unsigned int tmpflags, gs, oldgs, err = 0; - void __user *buf; + struct _fpstate_ia32 __user *buf; u32 tmp; /* Always make any pending restarted system calls return -EINTR */ @@ -260,12 +259,26 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, err |= __get_user(tmp, &sc->fpstate); buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387_ia32(buf); + } else { + struct task_struct *me = current; + + if (used_math()) { + clear_fpu(me); + clear_used_math(); + } + } err |= __get_user(tmp, &sc->ax); *peax = tmp; return err; + +badframe: + return 1; } asmlinkage long sys32_sigreturn(struct pt_regs *regs) @@ -337,7 +350,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) */ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, - void __user *fpstate, + struct _fpstate_ia32 __user *fpstate, struct pt_regs *regs, unsigned int mask) { int tmp, err = 0; @@ -367,7 +380,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, err |= __put_user((u32)regs->flags, &sc->flags); err |= __put_user((u32)regs->sp, &sc->sp_at_signal); - tmp = save_i387_xstate_ia32(fpstate); + tmp = save_i387_ia32(fpstate); if (tmp < 0) err = -EFAULT; else { @@ -388,8 +401,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, * Determine which stack to use.. */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size, - void **fpstate) + size_t frame_size) { unsigned long sp; @@ -408,11 +420,6 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ka->sa.sa_restorer) sp = (unsigned long) ka->sa.sa_restorer; - if (used_math()) { - sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; - } - sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ @@ -426,7 +433,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, struct sigframe __user *frame; void __user *restorer; int err = 0; - void __user *fpstate = NULL; /* copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -441,7 +447,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -450,7 +456,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, if (err) goto give_sigsegv; - err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); + err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, + set->sig[0]); if (err) goto give_sigsegv; @@ -514,7 +521,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; - void __user *fpstate = NULL; /* __copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -530,7 +536,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -543,16 +549,13 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index c9be69fedb70..3db651fc8ec5 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o -obj-y += i387.o xsave.o +obj-y += i387.o obj-y += ptrace.o obj-y += ds.o obj-$(CONFIG_X86_32) += tls.o @@ -69,7 +69,6 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o @@ -105,8 +104,6 @@ obj-$(CONFIG_OLPC) += olpc.o ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += bios_uv.o - obj-y += genx2apic_cluster.o - obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/trunk/arch/x86/kernel/acpi/boot.c b/trunk/arch/x86/kernel/acpi/boot.c index c2ac1b4515a0..7d40ef7b36e3 100644 --- a/trunk/arch/x86/kernel/acpi/boot.c +++ b/trunk/arch/x86/kernel/acpi/boot.c @@ -252,8 +252,10 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) return; } +#ifdef CONFIG_X86_32 if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; +#endif generic_processor_info(id, ver); } @@ -772,9 +774,11 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); +#ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); +#endif } } @@ -1346,9 +1350,7 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; -#ifdef CONFIG_X86_32 setup_apic_routing(); -#endif } } if (error == -EINVAL) { diff --git a/trunk/arch/x86/kernel/apic_32.c b/trunk/arch/x86/kernel/apic_32.c index a91c57cb666a..f88bd0d982b0 100644 --- a/trunk/arch/x86/kernel/apic_32.c +++ b/trunk/arch/x86/kernel/apic_32.c @@ -60,8 +60,10 @@ unsigned long mp_lapic_addr; static int force_enable_local_apic; int disable_apic; +/* Local APIC timer verification ok */ +static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; +static int local_apic_timer_disabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -128,11 +130,7 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { -#ifdef CONFIG_X86_64 - return 1; -#else return APIC_INTEGRATED(lapic_get_version()); -#endif } /* @@ -147,18 +145,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) +void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_xapic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -174,48 +167,16 @@ u32 safe_xapic_wait_icr_idle(void) return send_status; } -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v; + unsigned int v = APIC_DM_NMI; - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ + /* Level triggered for 82489DX */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT0, v); } @@ -232,13 +193,9 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v; + unsigned int v = apic_read(APIC_LVR); - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ + /* 82489DXs do not report # of LVT entries. */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } @@ -246,12 +203,8 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else +/* Clock divisor is set to 16 */ #define APIC_DIVISOR 16 -#endif /* * This function sets up the local APIC timer, with a timeout of @@ -259,9 +212,6 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -283,43 +233,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} - /* * Program the next event, relative to now */ @@ -339,8 +259,8 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned long flags; unsigned int v; - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) + /* Lapic used for broadcast ? */ + if (!local_apic_timer_verify_ok) return; local_irq_save(flags); @@ -553,7 +473,7 @@ static int __init calibrate_APIC_clock(void) return -1; } - levt->features &= ~CLOCK_EVT_FEAT_DUMMY; + local_apic_timer_verify_ok = 1; /* We trust the pm timer based calibration */ if (!pm_referenced) { @@ -587,11 +507,11 @@ static int __init calibrate_APIC_clock(void) if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else - levt->features |= CLOCK_EVT_FEAT_DUMMY; + local_apic_timer_verify_ok = 0; } else local_irq_enable(); - if (levt->features & CLOCK_EVT_FEAT_DUMMY) { + if (!local_apic_timer_verify_ok) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); return -1; @@ -613,8 +533,7 @@ void __init setup_boot_APIC_clock(void) * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); + if (local_apic_timer_disabled) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -683,11 +602,7 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif evt->event_handler(evt); } @@ -726,6 +641,35 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + /* * Local APIC start and shutdown */ @@ -771,7 +715,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) +#ifdef CONFIG_X86_MCE_P4THERMAL if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -788,6 +732,10 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); +#ifdef CONFIG_X86_MCE_P4THERMAL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); +#endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { if (maxlvt > 3) @@ -802,7 +750,7 @@ void clear_local_APIC(void) */ void disable_local_APIC(void) { - unsigned int value; + unsigned long value; clear_local_APIC(); @@ -814,7 +762,6 @@ void disable_local_APIC(void) value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); -#ifdef CONFIG_X86_32 /* * When LAPIC was disabled by the BIOS and enabled by the kernel, * restore the disabled state. @@ -826,7 +773,6 @@ void disable_local_APIC(void) l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } -#endif } /* @@ -843,15 +789,11 @@ void lapic_shutdown(void) return; local_irq_save(flags); + clear_local_APIC(); -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif + if (enabled_via_apicbase) disable_local_APIC(); - local_irq_restore(flags); } @@ -896,12 +838,6 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; /* * The next two are just to see if we have sane values. @@ -927,15 +863,14 @@ void __init sync_Arb_IDs(void) */ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; - /* * Wait for idle. */ apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, + APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* @@ -943,7 +878,7 @@ void __init sync_Arb_IDs(void) */ void __init init_bsp_APIC(void) { - unsigned int value; + unsigned long value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -964,13 +899,11 @@ void __init init_bsp_APIC(void) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; -#ifdef CONFIG_X86_32 /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else -#endif value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -989,16 +922,6 @@ static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } /* !82489DX */ maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -1019,7 +942,16 @@ static void __cpuinit lapic_setup_esr(void) "vector: 0x%08lx after: 0x%08lx\n", oldvalue, value); } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); + if (esr_disable) + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + else + printk(KERN_INFO "No ESR for 82489DX.\n"); } } @@ -1157,17 +1089,13 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { - lapic_setup_esr(); + unsigned long value; -#ifdef CONFIG_X86_32 - { - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); - } -#endif + lapic_setup_esr(); + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); setup_apic_nmi_watchdog(NULL); apic_pm_activate(); @@ -1277,7 +1205,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } @@ -1314,7 +1242,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); @@ -1393,12 +1321,59 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } +#ifdef CONFIG_SMP +void __init smp_intr_init(void) +{ + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); +} +#endif + +/* + * Initialize APIC interrupts + */ +void __init apic_intr_init(void) +{ +#ifdef CONFIG_SMP + smp_intr_init(); +#endif + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + /* thermal monitor LVT interrupt */ +#ifdef CONFIG_X86_MCE_P4THERMAL + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif +} + /** * connect_bsp_APIC - attach the APIC to the interrupt system */ void __init connect_bsp_APIC(void) { -#ifdef CONFIG_X86_32 if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. @@ -1413,7 +1388,6 @@ void __init connect_bsp_APIC(void) outb(0x70, 0x22); outb(0x01, 0x23); } -#endif enable_apic_mode(); } @@ -1426,9 +1400,6 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { - unsigned int value; - -#ifdef CONFIG_X86_32 if (pic_mode) { /* * Put the board back into PIC mode (has an effect only on @@ -1440,53 +1411,54 @@ void disconnect_bsp_APIC(int virt_wire_setup) "entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); - return; - } -#endif + } else { + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; - /* Go back to Virtual Wire compatibility mode */ + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); + if (!virt_wire_setup) { + /* + * For LVT0 make it edge triggered, active high, + * external and enabled + */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); + } - if (!virt_wire_setup) { /* - * For LVT0 make it edge triggered, active high, - * external and enabled + * For LVT1 make it edge triggered, active high, nmi and + * enabled */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + value = apic_read(APIC_LVT1); + value &= ~( + APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); } void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; + physid_mask_t phys_cpu; /* * Validate version @@ -1499,6 +1471,9 @@ void __cpuinit generic_processor_info(int apicid, int version) } apic_version[apicid] = version; + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." " Processor ignored.\n", NR_CPUS); @@ -1509,19 +1484,17 @@ void __cpuinit generic_processor_info(int apicid, int version) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { + if (apicid == boot_cpu_physical_apicid) /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ cpu = 0; - } + if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1541,9 +1514,7 @@ void __cpuinit generic_processor_info(int apicid, int version) def_to_bigsmp = 1; } } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) +#ifdef CONFIG_SMP /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1556,7 +1527,6 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } #endif - cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } @@ -1567,11 +1537,6 @@ void __cpuinit generic_processor_info(int apicid, int version) #ifdef CONFIG_PM static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1612,7 +1577,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_MCE_P4THERMAL if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif @@ -1636,23 +1601,16 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_64 - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1662,7 +1620,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_MCE_P4THERMAL if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1676,9 +1634,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); - return 0; } @@ -1734,20 +1690,20 @@ static int __init parse_lapic(char *arg) } early_param("lapic", parse_lapic); -static int __init setup_disableapic(char *arg) +static int __init parse_nolapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("disableapic", setup_disableapic); +early_param("nolapic", parse_nolapic); -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) +static int __init parse_disable_lapic_timer(char *arg) { - return setup_disableapic(arg); + local_apic_timer_disabled = 1; + return 0; } -early_param("nolapic", setup_nolapic); +early_param("nolapic_timer", parse_disable_lapic_timer); static int __init parse_lapic_timer_c2_ok(char *arg) { @@ -1756,40 +1712,15 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - static int __init apic_set_verbosity(char *arg) { - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; -#endif + if (!arg) return -EINVAL; - } - if (strcmp("debug", arg) == 0) + if (strcmp(arg, "debug") == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) + else if (strcmp(arg, "verbose") == 0) apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } return 0; } diff --git a/trunk/arch/x86/kernel/apic_64.c b/trunk/arch/x86/kernel/apic_64.c index 53898b65a6ae..446c062e831c 100644 --- a/trunk/arch/x86/kernel/apic_64.c +++ b/trunk/arch/x86/kernel/apic_64.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -40,20 +39,13 @@ #include #include #include -#include #include #include -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; -int disable_x2apic; -int x2apic; - -/* x2apic enabled before OS handover */ -int x2apic_preenabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; @@ -81,9 +73,6 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); -/* - * The local apic timer can be used for any function which is CPU local. - */ static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT @@ -110,15 +99,11 @@ static inline int lapic_get_version(void) } /* - * Check, if the APIC is integrated or a separate chip + * Check, if the APIC is integrated or a seperate chip */ static inline int lapic_is_integrated(void) { -#ifdef CONFIG_X86_64 return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif } /* @@ -133,18 +118,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) +void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_xapic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -160,68 +140,6 @@ u32 safe_xapic_wait_icr_idle(void) return send_status; } -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; - /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -231,11 +149,6 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT0, v); } @@ -244,27 +157,13 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v; + unsigned int v, maxlvt; v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; + maxlvt = GET_APIC_MAXLVT(v); + return maxlvt; } -/* - * Local APIC timer - */ - -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else -#define APIC_DIVISOR 16 -#endif - /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -275,6 +174,7 @@ int lapic_get_maxlvt(void) * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ + static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -282,9 +182,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -294,12 +191,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + apic_write(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); + apic_write(APIC_TMICT, clocks); } /* @@ -469,7 +366,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = (result * APIC_DIVISOR) / HZ; + calibration_result = result / HZ; /* * Do a sanity check on the APIC calibration result @@ -491,10 +388,10 @@ static int __init calibrate_APIC_clock(void) void __init setup_boot_APIC_clock(void) { /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. + * The local apic timer can be disabled via the kernel commandline. + * Register the lapic timer as a dummy clock event source on SMP + * systems, so the broadcast mechanism is used. On UP systems simply + * ignore it. */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); @@ -506,9 +403,7 @@ void __init setup_boot_APIC_clock(void) return; } - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - + printk(KERN_INFO "Using local APIC timer interrupts.\n"); if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -527,7 +422,6 @@ void __init setup_boot_APIC_clock(void) printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); - /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -566,11 +460,7 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ -#ifdef CONFIG_X86_64 add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif evt->event_handler(evt); } @@ -601,7 +491,6 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_enter(); local_apic_timer_interrupt(); irq_exit(); - set_irq_regs(old_regs); } @@ -655,13 +544,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } - /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif /* * Clean APIC state for other OSs: */ @@ -672,14 +554,8 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); } /** @@ -698,28 +574,8 @@ void disable_local_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif } -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ void lapic_shutdown(void) { unsigned long flags; @@ -729,13 +585,7 @@ void lapic_shutdown(void) local_irq_save(flags); -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - + disable_local_APIC(); local_irq_restore(flags); } @@ -779,10 +629,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = apic_read(APIC_ID); + reg0 = read_apic_id(); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); + reg1 = read_apic_id(); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -806,11 +656,8 @@ int __init verify_local_APIC(void) */ void __init sync_Arb_IDs(void) { - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ + if (modern_apic()) return; /* @@ -819,8 +666,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG + | APIC_DM_INIT); } /* @@ -837,6 +684,8 @@ void __init init_bsp_APIC(void) if (smp_found_config || !cpu_has_apic) return; + value = apic_read(APIC_LVR); + /* * Do not trust the local APIC being empty at bootup. */ @@ -848,15 +697,7 @@ void __init init_bsp_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; + value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -865,50 +706,9 @@ void __init init_bsp_APIC(void) */ apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); } -static void __cpuinit lapic_setup_esr(void) -{ - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); - } -} - - /** * setup_local_APIC - setup the local APIC */ @@ -1014,141 +814,23 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -void __cpuinit end_local_APIC_setup(void) -{ - lapic_setup_esr(); - -#ifdef CONFIG_X86_32 - { - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); - } -#endif - - setup_apic_nmi_watchdog(NULL); - apic_pm_activate(); -} - -void check_x2apic(void) -{ - int msr, msr2; - - rdmsr(MSR_IA32_APICBASE, msr, msr2); - - if (msr & X2APIC_ENABLE) { - printk("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic = 1; - apic_ops = &x2apic_ops; - } -} - -void enable_x2apic(void) +static void __cpuinit lapic_setup_esr(void) { - int msr, msr2; + unsigned maxlvt = lapic_get_maxlvt(); - rdmsr(MSR_IA32_APICBASE, msr, msr2); - if (!(msr & X2APIC_ENABLE)) { - printk("Enabling x2apic\n"); - wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); - } + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); } -void enable_IR_x2apic(void) +void __cpuinit end_local_APIC_setup(void) { -#ifdef CONFIG_INTR_REMAP - int ret; - unsigned long flags; - - if (!cpu_has_x2apic) - return; - - if (!x2apic_preenabled && disable_x2apic) { - printk(KERN_INFO - "Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); - return; - } - - if (x2apic_preenabled && disable_x2apic) - panic("Bios already enabled x2apic, can't enforce nox2apic"); - - if (!x2apic_preenabled && skip_ioapic_setup) { - printk(KERN_INFO - "Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); - return; - } - - ret = dmar_table_init(); - if (ret) { - printk(KERN_INFO - "dmar_table_init() failed with %d:\n", ret); - - if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); - else - printk(KERN_INFO - "Not enabling x2apic,Intr-remapping\n"); - return; - } - - local_irq_save(flags); - mask_8259A(); - save_mask_IO_APIC_setup(); - - ret = enable_intr_remapping(1); - - if (ret && x2apic_preenabled) { - local_irq_restore(flags); - panic("x2apic enabled by bios. But IR enabling failed"); - } - - if (ret) - goto end; - - if (!x2apic) { - x2apic = 1; - apic_ops = &x2apic_ops; - enable_x2apic(); - } -end: - if (ret) - /* - * IR enabling failed - */ - restore_IO_APIC_setup(); - else - reinit_intr_remapped_IO_APIC(x2apic_preenabled); - - unmask_8259A(); - local_irq_restore(flags); - - if (!ret) { - if (!x2apic_preenabled) - printk(KERN_INFO - "Enabled x2apic and interrupt-remapping\n"); - else - printk(KERN_INFO - "Enabled Interrupt-remapping\n"); - } else - printk(KERN_ERR - "Failed to enable Interrupt-remapping and x2apic\n"); -#else - if (!cpu_has_x2apic) - return; - - if (x2apic_preenabled) - panic("x2apic enabled prior OS handover," - " enable CONFIG_INTR_REMAP"); - - printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); -#endif - - return; + lapic_setup_esr(); + setup_apic_nmi_watchdog(NULL); + apic_pm_activate(); } /* @@ -1190,7 +872,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } /** @@ -1198,11 +880,6 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { - if (x2apic) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } - /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -1222,15 +899,13 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } /* * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ -int apic_version[MAX_APICS]; - int __init APIC_init_uniprocessor(void) { if (disable_apic) { @@ -1243,9 +918,6 @@ int __init APIC_init_uniprocessor(void) return -1; } - enable_IR_x2apic(); - setup_apic_routing(); - verify_local_APIC(); connect_bsp_APIC(); @@ -1332,57 +1004,17 @@ asmlinkage void smp_error_interrupt(void) } /** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ + * * connect_bsp_APIC - attach the APIC to the interrupt system + * */ void __init connect_bsp_APIC(void) { -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -#endif enable_apic_mode(); } -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ void disconnect_bsp_APIC(int virt_wire_setup) { - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - return; - } -#endif - /* Go back to Virtual Wire compatibility mode */ + unsigned long value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); @@ -1408,10 +1040,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT0, APIC_LVT_MASKED); } - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ + /* For LVT1 make it edge triggered, active high, nmi and enabled */ value = apic_read(APIC_LVT1); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | @@ -1426,20 +1055,9 @@ void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } @@ -1459,29 +1077,6 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1493,28 +1088,20 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_cpu_to_apicid, cpu) = apicid; per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } -#endif cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} - /* * Power management */ #ifdef CONFIG_PM static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ + /* 'active' is true if the local APIC was enabled by us and + not the BIOS; this signifies that we are also responsible + for disabling it before entering apm/acpi suspend */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1542,7 +1129,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = apic_read(APIC_ID); + apic_pm_state.apic_id = read_apic_id(); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); @@ -1555,11 +1142,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_MCE_INTEL if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif - local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); @@ -1578,25 +1164,10 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - -#ifdef CONFIG_X86_64 - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1605,7 +1176,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_MCE_INTEL if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1619,17 +1190,10 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); - return 0; } -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - static struct sysdev_class lapic_sysclass = { .name = "lapic", .resume = lapic_resume, @@ -1743,19 +1307,31 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } -static __init int setup_nox2apic(char *str) +/* + * APIC command line parameters + */ +static int __init apic_set_verbosity(char *str) { - disable_x2apic = 1; - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + if (str == NULL) { + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; + } + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", str); + return -EINVAL; + } + return 0; } -early_param("nox2apic", setup_nox2apic); - +early_param("apic", apic_set_verbosity); -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) +static __init int setup_disableapic(char *str) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); @@ -1764,9 +1340,9 @@ static int __init setup_disableapic(char *arg) early_param("disableapic", setup_disableapic); /* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) +static __init int setup_nolapic(char *str) { - return setup_disableapic(arg); + return setup_disableapic(str); } early_param("nolapic", setup_nolapic); @@ -1777,19 +1353,14 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) +static __init int setup_noapictimer(char *str) { + if (str[0] != ' ' && str[0] != 0) + return 0; disable_apic_timer = 1; - return 0; + return 1; } -early_param("nolapic_timer", parse_nolapic_timer); +__setup("noapictimer", setup_noapictimer); static __init int setup_apicpmtimer(char *s) { @@ -1799,31 +1370,6 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - static int __init lapic_insert_resource(void) { if (!apic_phys) diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile index 7f0b45a5d788..ee76eaad3001 100644 --- a/trunk/arch/x86/kernel/cpu/Makefile +++ b/trunk/arch/x86/kernel/cpu/Makefile @@ -3,30 +3,22 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o capflags.o powerflags.o common.o - -obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o -obj-$(CONFIG_X86_64) += bugs_64.o - -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o -obj-$(CONFIG_CPU_SUP_AMD) += amd.o -obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o -obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o -obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o -obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o -obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o +obj-y += proc.o feature_names.o + +obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_X86_32) += amd.o +obj-$(CONFIG_X86_64) += amd_64.o +obj-$(CONFIG_X86_32) += cyrix.o +obj-$(CONFIG_X86_32) += centaur.o +obj-$(CONFIG_X86_64) += centaur_64.o +obj-$(CONFIG_X86_32) += transmeta.o +obj-$(CONFIG_X86_32) += intel.o +obj-$(CONFIG_X86_64) += intel_64.o +obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o - -quiet_cmd_mkcapflags = MKCAP $@ - cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ - -cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h - -targets += capflags.c -$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE - $(call if_changed,mkcapflags) diff --git a/trunk/arch/x86/kernel/cpu/addon_cpuid_features.c b/trunk/arch/x86/kernel/cpu/addon_cpuid_features.c index 0d9c993aa93e..a6ef672adbba 100644 --- a/trunk/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/trunk/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,8 +7,6 @@ #include #include -#include - struct cpuid_bit { u16 feature; u8 reg; @@ -50,92 +48,6 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) } } -/* leaf 0xb SMT level */ -#define SMT_LEVEL 0 - -/* leaf 0xb sub-leaf types */ -#define INVALID_TYPE 0 -#define SMT_TYPE 1 -#define CORE_TYPE 2 - -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) -#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) - -/* - * Check for extended topology enumeration cpuid leaf 0xb and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ -void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width; - unsigned int core_select_mask, core_level_siblings; - - if (c->cpuid_level < 0xb) - return; - - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - - /* - * check if the cpuid leaf 0xb is actually implemented. - */ - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return; - - set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); - - /* - * initial apic id, which also represents 32-bit extended x2apic id. - */ - c->initial_apicid = edx; - - /* - * Populate HT related information from sub-leaf level 0. - */ - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); - core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - - sub_index = 1; - do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); - - /* - * Check for the Core type in the implemented sub leaves. - */ - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - break; - } - - sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - - core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - -#ifdef CONFIG_X86_32 - c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) - & core_select_mask; - c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); -#else - c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(core_plus_mask_width); -#endif - c->x86_max_cores = (core_level_siblings / smp_num_siblings); - - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - return; -#endif -} - #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c index 32e73520adf7..18514ed26104 100644 --- a/trunk/arch/x86/kernel/cpu/amd.c +++ b/trunk/arch/x86/kernel/cpu/amd.c @@ -1,22 +1,13 @@ #include #include #include - #include #include #include -#ifdef CONFIG_X86_64 -# include -# include -# include -#endif - #include - #include "cpu.h" -#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -33,273 +24,26 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) -{ -/* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } -} - - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - return; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - return; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - return; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - return; - } -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); -} -#endif - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; -#endif -} - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - int cpu = smp_processor_id(); - int node; - unsigned apicid = hard_smp_processor_id(); - - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + } -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -#else /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); -#endif + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) { + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + int r; + #ifdef CONFIG_SMP unsigned long long value; @@ -310,7 +54,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 0xf) { + if (c->x86 == 15) { rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K7_HWCR, value); @@ -319,120 +63,210 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) early_init_amd(c); + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ + r = get_model_name(c); switch (c->x86) { case 4: - init_amd_k5(c); - break; + /* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } + break; case 5: - init_amd_k6(c); - break; + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + break; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + break; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + break; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + break; + } - /* Enable workaround for FXSAVE leak */ + switch (c->x86) { + case 15: + /* Use K8 tuning for Fam10h and Fam11h */ + case 0x10: + case 0x11: + set_cpu_cap(c, X86_FEATURE_K8); + break; + case 6: + set_cpu_cap(c, X86_FEATURE_K7); + break; + } if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - display_cacheinfo(c); - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } + if (cpuid_eax(0x80000000) >= 0x80000008) + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; -#ifdef CONFIG_X86_32 - detect_ht(c); +#ifdef CONFIG_X86_HT + /* + * On a AMD multi core setup the lower bits of the APIC id + * distinguish the cores. + */ + if (c->x86_max_cores > 1) { + int cpu = smp_processor_id(); + unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; + printk(KERN_INFO "CPU %d(%d) -> Core %d\n", + cpu, c->x86_max_cores, c->cpu_core_id); + } #endif - if (c->extended_cpuid_level >= 0x80000006) { - if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + if (cpuid_eax(0x80000000) >= 0x80000006) { + if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; else num_cache_leaves = 3; } - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_K8); + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ + if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif } -#ifdef CONFIG_X86_32 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ @@ -445,12 +279,10 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } return size; } -#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, -#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { @@ -463,11 +295,9 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, - .c_size_cache = amd_size_cache, -#endif .c_early_init = early_init_amd, .c_init = init_amd, - .c_x86_vendor = X86_VENDOR_AMD, + .c_size_cache = amd_size_cache, }; -cpu_dev_register(amd_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); diff --git a/trunk/arch/x86/kernel/cpu/amd_64.c b/trunk/arch/x86/kernel/cpu/amd_64.c new file mode 100644 index 000000000000..d1692b2a41ff --- /dev/null +++ b/trunk/arch/x86/kernel/cpu/amd_64.c @@ -0,0 +1,224 @@ +#include +#include + +#include +#include +#include + +#include + +#include "cpu.h" + +int force_mwait __cpuinitdata; + +#ifdef CONFIG_NUMA +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits; +#ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node = 0; + unsigned apicid = hard_smp_processor_id(); +#endif + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; + +#ifdef CONFIG_NUMA + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; + +#endif +} + +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + early_init_amd_mc(c); + + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + + set_cpu_cap(c, X86_FEATURE_SYSCALL32); +} + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + unsigned level; + +#ifdef CONFIG_SMP + unsigned long value; + + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 0xf) { + rdmsrl(MSR_K8_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K8_HWCR, value); + } +#endif + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_cpu_cap(c, 0*32+31); + + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); + + level = get_model_name(c); + if (!level) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } + display_cacheinfo(c); + + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); + + if (c->extended_cpuid_level >= 0x80000006 && + (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); + + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < + (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || + ((tseg>>PMD_SHIFT) < + (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +} + +static struct cpu_dev amd_cpu_dev __cpuinitdata = { + .c_vendor = "AMD", + .c_ident = { "AuthenticAMD" }, + .c_early_init = early_init_amd, + .c_init = init_amd, +}; + +cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); + diff --git a/trunk/arch/x86/kernel/cpu/centaur.c b/trunk/arch/x86/kernel/cpu/centaur.c index 89bfdd9cacc6..a0534c04d38a 100644 --- a/trunk/arch/x86/kernel/cpu/centaur.c +++ b/trunk/arch/x86/kernel/cpu/centaur.c @@ -289,6 +289,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); + get_model_name(c); display_cacheinfo(c); } @@ -474,7 +475,6 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, - .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_dev_register(centaur_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); diff --git a/trunk/arch/x86/kernel/cpu/centaur_64.c b/trunk/arch/x86/kernel/cpu/centaur_64.c index a1625f5a1e78..1d181c40e2e1 100644 --- a/trunk/arch/x86/kernel/cpu/centaur_64.c +++ b/trunk/arch/x86/kernel/cpu/centaur_64.c @@ -16,10 +16,9 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { - early_init_centaur(c); - if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_REP_GOOD); } set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); @@ -30,8 +29,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, - .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_dev_register(centaur_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); diff --git a/trunk/arch/x86/kernel/cpu/cmpxchg.c b/trunk/arch/x86/kernel/cpu/cmpxchg.c deleted file mode 100644 index 2056ccf572cc..000000000000 --- a/trunk/arch/x86/kernel/cpu/cmpxchg.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * cmpxchg*() fallbacks for CPU not supporting these instructions - */ - -#include -#include -#include - -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c index 7581b62df184..4e456bd955bb 100644 --- a/trunk/arch/x86/kernel/cpu/common.c +++ b/trunk/arch/x86/kernel/cpu/common.c @@ -1,62 +1,28 @@ #include -#include -#include #include -#include -#include -#include -#include -#include #include #include +#include #include +#include +#include #include #include #include -#include #include #include #include #include #include -#include #ifdef CONFIG_X86_LOCAL_APIC #include #include #include -#include #endif -#include -#include -#include -#include -#include -#include -#include -#include - #include "cpu.h" -static struct cpu_dev *this_cpu __cpuinitdata; - -#ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; -#else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -90,150 +56,17 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; -#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -#ifdef CONFIG_X86_32 -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); -#else -static inline int flag_is_changeable_p(u32 flag) -{ - return 1; -} -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} -static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ -} -#endif - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif -} +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else /* Not much we can do here... */ /* Check if at least it has cpuid */ if (c->cpuid_level == -1) { @@ -243,22 +76,28 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) else if (c->x86 == 3) strcpy(c->x86_model_id, "386"); } -#endif } static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, }; +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; -static void __cpuinit get_model_name(struct cpuinfo_x86 *c) +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; - if (c->extended_cpuid_level < 0x80000004) - return; + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; v = (unsigned int *) c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); @@ -277,34 +116,30 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } + + return 1; } + void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ebx, ecx, edx, l2size; + unsigned int n, dummy, ecx, edx, l2size; - n = c->extended_cpuid_level; + n = cpuid_eax(0x80000000); if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); -#ifdef CONFIG_X86_64 - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; -#endif + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24)+(edx>>24); } if (n < 0x80000006) /* Some chips just has a large L1. */ return; - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + ecx = cpuid_ecx(0x80000006); l2size = ecx >> 16; -#ifdef CONFIG_X86_64 - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); -#else /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -315,106 +150,116 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (l2size == 0) return; /* Again, no L2 cache is possible */ -#endif c->x86_cache_size = l2size; printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); + l2size, ecx & 0xFF); } -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ - smp_num_siblings = smp_num_siblings / c->x86_max_cores; +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; - index_msb = get_count_order(smp_num_siblings); + if (c->x86_model >= 16) + return NULL; /* Range check */ - core_bits = get_count_order(c->x86_max_cores); + if (!this_cpu) + return NULL; -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif - } + info = this_cpu->c_models; -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; } -#endif + return NULL; /* Not found */ } -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) + +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (!cpu_devs[i]) - break; - - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - this_cpu = cpu_devs[i]; - c->x86_vendor = this_cpu->c_x86_vendor; - return; + if (cpu_devs[i]) { + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + if (!early) + this_cpu = cpu_devs[i]; + return; + } } } - if (!printed) { printed++; - printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } - c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } -void __cpuinit cpu_detect(struct cpuinfo_x86 *c) + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, @@ -423,87 +268,50 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[4]); c->x86 = 4; - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; if (cap0 & (1<<19)) { + c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_cache_alignment = c->x86_clflush_size; } } } - -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; - u32 ebx; + unsigned int ebx; - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); + memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (have_cpuid_p()) { + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; } - } -#ifdef CONFIG_X86_64 - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + } - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; } -#endif - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); } -static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_32 - int i; - - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - - for (i = 0; i < X86_VENDOR_NUM; i++) - if (cpu_devs[i] && cpu_devs[i]->c_identify) { - c->x86_vendor_id[0] = 0; - cpu_devs[i]->c_identify(c); - if (c->x86_vendor_id[0]) { - get_cpu_vendor(c); - break; - } - } -#endif -} - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -513,61 +321,25 @@ static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) * WARNING: this function is only called on the BP. Don't add code here * that is supposed to run on all CPUs. */ -static void __init early_identify_cpu(struct cpuinfo_x86 *c) +static void __init early_cpu_detect(void) { -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; -#else - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->extended_cpuid_level = 0; + struct cpuinfo_x86 *c = &boot_cpu_data; - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); + c->x86_cache_alignment = 32; + c->x86_clflush_size = 32; - /* cyrix could have cpuid enabled via c_identify()*/ if (!have_cpuid_p()) return; cpu_detect(c); - get_cpu_vendor(c); + get_cpu_vendor(c, 1); - get_cpu_cap(c); + early_get_cap(c); - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); - - validate_pat_support(c); -} - -void __init early_cpu_init(void) -{ - struct cpu_dev **cdev; - int count = 0; - - printk("KERNEL supported cpus:\n"); - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; - unsigned int j; - - if (count >= X86_VENDOR_NUM) - break; - cpu_devs[count] = cpudev; - count++; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(" %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } - - early_identify_cpu(&boot_cpu_data); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && + cpu_devs[c->x86_vendor]->c_early_init) + cpu_devs[c->x86_vendor]->c_early_init(c); } /* @@ -585,41 +357,86 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - c->extended_cpuid_level = 0; - - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); - - /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; - - cpu_detect(c); + u32 tfms, xlvl; + unsigned int ebx; + + if (have_cpuid_p()) { + /* Get vendor name */ + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, + (unsigned int *)&c->x86_vendor_id[0], + (unsigned int *)&c->x86_vendor_id[8], + (unsigned int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c, 0); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; + c->initial_apicid = (ebx >> 24) & 0xFF; +#ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->phys_proc_id = c->initial_apicid; +#else + c->apicid = c->initial_apicid; +#endif + if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } - get_cpu_vendor(c); + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + if (xlvl >= 0x80000004) + get_model_name(c); /* Default name */ + } - get_cpu_cap(c); + init_scattered_cpuid_features(c); + detect_nopl(c); + } +} - if (c->cpuid_level >= 0x00000001) { - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); -# else - c->apicid = c->initial_apicid; -# endif -#endif +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); -#ifdef CONFIG_X86_HT - c->phys_proc_id = c->initial_apicid; -#endif + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); } +} - get_model_name(c); /* Default name */ - - init_scattered_cpuid_features(c); - detect_nopl(c); +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; } +__setup("serialnumber", x86_serial_nr_setup); + + /* * This does the hard work of actually picking apart the CPU stuff... @@ -631,29 +448,30 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; - c->x86_coreid_bits = 0; -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; -#else - c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (!have_cpuid_p()) { + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + } + generic_identify(c); if (this_cpu->c_identify) this_cpu->c_identify(c); -#ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); -#endif - /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -687,10 +505,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -699,7 +513,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) + for (i = 0 ; i < NCAPINTS ; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -707,79 +521,72 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) for (i = 0; i < NCAPINTS; i++) c->x86_capability[i] &= ~cleared_cpu_caps[i]; -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); -#endif select_idle_routine(c); - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - numa_add_cpu(smp_processor_id()); -#endif } void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); -#ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); -#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); -#ifdef CONFIG_X86_32 enable_sep_cpu(); -#endif mtrr_ap_init(); } -struct msr_range { - unsigned min; - unsigned max; -}; +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; -static struct msr_range msr_range_array[] __cpuinitdata = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; + cpuid(1, &eax, &ebx, &ecx, &edx); -static void __cpuinit print_cpu_msr(void) -{ - unsigned index; - u64 val; - int i; - unsigned index_min, index_max; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) - continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of the " + "siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; } - } -} -static int show_msr __cpuinitdata; -static __init int setup_show_msr(char *arg) -{ - int num; + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - get_option(&arg, &num); + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); - if (num > 0) - show_msr = num; - return 1; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings) ; + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } } -__setup("show_msr=", setup_show_msr); +#endif static __init int setup_noclflush(char *arg) { @@ -798,25 +605,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) vendor = c->x86_vendor_id; if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk(KERN_CONT "%s ", vendor); + printk("%s ", vendor); - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + if (!c->x86_model_id[0]) + printk("%d86", c->x86); else - printk(KERN_CONT "%d86", c->x86); + printk("%s", c->x86_model_id); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); + printk(" stepping %02x\n", c->x86_mask); else - printk(KERN_CONT "\n"); - -#ifdef CONFIG_SMP - if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif + printk("\n"); } static __init int setup_disablecpuid(char *arg) @@ -832,89 +631,19 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -#ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; - -void __cpuinit pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) +void __init early_cpu_init(void) { - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); + struct cpu_vendor_dev *cvdev; -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif + for (cvdev = __x86cpuvendor_start ; + cvdev < __x86cpuvendor_end ; + cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); + early_cpu_detect(); + validate_pat_support(&boot_cpu_data); } -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#else - /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -922,136 +651,25 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) regs->fs = __KERNEL_PERCPU; return regs; } -#endif + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit */ -#ifdef CONFIG_X86_64 -void __cpuinit cpu_init(void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - if (cpu != 0 && x2apic) - enable_x2apic(); - - /* - * set up and load the per-CPU TSS - */ - if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - } - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} - -#else - void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); @@ -1105,20 +723,9 @@ void __cpuinit cpu_init(void) /* * Force FPU initialization: */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); - - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - - xsave_init(); } #ifdef CONFIG_HOTPLUG_CPU @@ -1132,5 +739,3 @@ void __cpuinit cpu_uninit(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } #endif - -#endif diff --git a/trunk/arch/x86/kernel/cpu/common_64.c b/trunk/arch/x86/kernel/cpu/common_64.c new file mode 100644 index 000000000000..305b465889b0 --- /dev/null +++ b/trunk/arch/x86/kernel/cpu/common_64.c @@ -0,0 +1,763 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpu.h" + +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); +} + +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; + +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ + display_cacheinfo(c); +} + +static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", +}; +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + + if (c->extended_cpuid_level < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + return 1; +} + + +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ebx, ecx, edx; + + n = c->extended_cpuid_level; + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " + "D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; + } + + if (n >= 0x80000006) { + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + ecx = cpuid_ecx(0x80000006); + c->x86_cache_size = ecx >> 16; + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + c->x86_cache_size, ecx & 0xFF); + } +} + +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + + if (!cpu_has(c, X86_FEATURE_HT)) + return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of " + "siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(index_msb); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); + } +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } + +#endif +} + +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + this_cpu = cpu_devs[i]; + return; + } + } + } + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +static void __init early_cpu_support_print(void) +{ + int i,j; + struct cpu_dev *cpu_devx; + + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } + } +} + +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); + +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start ; + cvdev < __x86cpuvendor_end ; + cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + +/* Do some early cpuid on the boot CPU to get some parameter that are + needed before check_bugs. Everything advanced is in identify_cpu + below. */ +static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + c->x86_max_cores = 1; + c->x86_coreid_bits = 0; + c->extended_cpuid_level = 0; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + /* Get vendor name */ + cpuid(0x00000000, (unsigned int *)&c->cpuid_level, + (unsigned int *)&c->x86_vendor_id[0], + (unsigned int *)&c->x86_vendor_id[8], + (unsigned int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + __u32 misc; + cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +#ifdef CONFIG_SMP + c->phys_proc_id = c->initial_apicid; +#endif + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + if (xlvl >= 0x80000004) + get_model_name(c); /* Default name */ + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + detect_nopl(c); + + if (c->x86_vendor != X86_VENDOR_UNKNOWN && + cpu_devs[c->x86_vendor]->c_early_init) + cpu_devs[c->x86_vendor]->c_early_init(c); + + validate_pat_support(c); +} + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +{ + int i; + + early_identify_cpu(c); + + init_scattered_cpuid_features(c); + + c->apicid = phys_pkg_id(0); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + if (this_cpu->c_init) + this_cpu->c_init(c); + + detect_ht(c); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if (c != &boot_cpu_data) { + /* AND the already accumulated flags with these */ + for (i = 0; i < NCAPINTS; i++) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + /* Clear all flags overriden by options */ + for (i = 0; i < NCAPINTS; i++) + c->x86_capability[i] &= ~cleared_cpu_caps[i]; + +#ifdef CONFIG_X86_MCE + mcheck_init(c); +#endif + select_idle_routine(c); + +#ifdef CONFIG_NUMA + numa_add_cpu(smp_processor_id()); +#endif + +} + +void __cpuinit identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + mtrr_ap_init(); +} + +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) +{ + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(KERN_CONT " stepping %02x\n", c->x86_mask); + else + printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif +} + +static __init int setup_disablecpuid(char *arg) +{ + int bit; + if (get_option(&arg, &bit) && bit < NCAPINTS*32) + setup_clear_cpu_cap(bit); + else + return 0; + return 1; +} +__setup("clearcpuid=", setup_disablecpuid); + +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + +void pda_init(int cpu) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; + } else { + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] __page_aligned_bss; + +extern asmlinkage void ignore_sysret(void); + +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +} + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init. + */ +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + + /* + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} diff --git a/trunk/arch/x86/kernel/cpu/cpu.h b/trunk/arch/x86/kernel/cpu/cpu.h index de4094a39210..4d894e8565fe 100644 --- a/trunk/arch/x86/kernel/cpu/cpu.h +++ b/trunk/arch/x86/kernel/cpu/cpu.h @@ -21,16 +21,23 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); - int c_x86_vendor; }; -#define cpu_dev_register(cpu_devX) \ - static struct cpu_dev *__cpu_dev_##cpu_devX __used \ - __attribute__((__section__(".x86_cpu_dev.init"))) = \ - &cpu_devX; +extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; -extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; +struct cpu_vendor_dev { + int vendor; + struct cpu_dev *cpu_dev; +}; + +#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ + static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ + __attribute__((__section__(".x86cpuvendor.init"))) = \ + { cpu_vendor_id, cpu_dev } + +extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; +extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); #endif diff --git a/trunk/arch/x86/kernel/cpu/cyrix.c b/trunk/arch/x86/kernel/cpu/cyrix.c index ffd0f5ed071a..898a5a2002ed 100644 --- a/trunk/arch/x86/kernel/cpu/cyrix.c +++ b/trunk/arch/x86/kernel/cpu/cyrix.c @@ -121,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -132,11 +132,11 @@ static void __cpuinit set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -150,14 +150,14 @@ static void __cpuinit geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -291,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -301,6 +301,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); + get_model_name(c); /* get CPU marketing name */ return; } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; @@ -313,7 +314,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } @@ -428,7 +429,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ local_irq_restore(flags); } @@ -441,16 +442,14 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, - .c_x86_vendor = X86_VENDOR_CYRIX, }; -cpu_dev_register(cyrix_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, - .c_x86_vendor = X86_VENDOR_NSC, }; -cpu_dev_register(nsc_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); diff --git a/trunk/arch/x86/kernel/cpu/feature_names.c b/trunk/arch/x86/kernel/cpu/feature_names.c new file mode 100644 index 000000000000..c9017799497c --- /dev/null +++ b/trunk/arch/x86/kernel/cpu/feature_names.c @@ -0,0 +1,84 @@ +/* + * Strings for the various x86 capability flags. + * + * This file must not contain any executable code. + */ + +#include + +/* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ +const char * const x86_cap_flags[NCAPINTS*32] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +const char *const x86_power_flags[32] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* tsc invariant mapped to constant_tsc */ + /* nothing */ +}; diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c index 99468dbd08da..f113ef4595f6 100644 --- a/trunk/arch/x86/kernel/cpu/intel.c +++ b/trunk/arch/x86/kernel/cpu/intel.c @@ -15,11 +15,6 @@ #include #include -#ifdef CONFIG_X86_64 -#include -#include -#endif - #include "cpu.h" #ifdef CONFIG_X86_LOCAL_APIC @@ -28,22 +23,23 @@ #include #endif +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; -#endif + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } -#ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 * @@ -63,54 +59,15 @@ int __cpuinit ppro_with_ram_bug(void) return 0; } -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - -static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - /* - * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until - * model 3 mask 3 - */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); if ((lo & (1<<9)) == 0) { @@ -120,68 +77,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); } } - - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif } -#else -static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) -{ -} -#endif -static void __cpuinit srat_detect_node(void) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} /* * find out the number of processor cores on the die */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -196,51 +98,45 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) { - /* Intel VMX MSR indicated features */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 - - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - - clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - clear_cpu_cap(c, X86_FEATURE_VNMI); - clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - clear_cpu_cap(c, X86_FEATURE_EPT); - clear_cpu_cap(c, X86_FEATURE_VPID); - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) - set_cpu_cap(c, X86_FEATURE_EPT); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); } +#endif static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; + char *p = NULL; early_init_intel(c); - intel_workarounds(c); +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { @@ -250,32 +146,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else /* * Names for the Pentium II/Celeron processors * detectable only by also checking the cache size. * Dixon is NOT a Celeron. */ if (c->x86 == 6) { - char *p = NULL; - switch (c->x86_model) { case 5: if (c->x86_mask == 0) { @@ -298,41 +178,71 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) p = "Celeron (Coppermine)"; break; } + } + + if (p) + strcpy(c->x86_model_id, p); + + c->x86_max_cores = num_cpu_cores(c); + + detect_ht(c); - if (p) - strcpy(c->x86_model_id, p); + /* Work around errata */ + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; } +#endif - if (c->x86 == 15) + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (c->x86 == 15) { set_cpu_cap(c, X86_FEATURE_P4); + } if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); + } if (cpu_has_bts) ptrace_bts_init_intel(c); -#endif + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); - detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); #endif - } - - /* Work around errata */ - srat_detect_node(); - - if (cpu_has(c, X86_FEATURE_VMX)) - detect_vmx_virtcap(c); } -#ifdef CONFIG_X86_32 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* @@ -345,12 +255,10 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i size = 256; return size; } -#endif static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, -#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = { @@ -400,12 +308,76 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { } }, }, - .c_size_cache = intel_size_cache, -#endif .c_early_init = early_init_intel, .c_init = init_intel, - .c_x86_vendor = X86_VENDOR_INTEL, + .c_size_cache = intel_size_cache, }; -cpu_dev_register(intel_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + +/* arch_initcall(intel_cpu_init); */ diff --git a/trunk/arch/x86/kernel/cpu/intel_64.c b/trunk/arch/x86/kernel/cpu/intel_64.c new file mode 100644 index 000000000000..1019c58d39f0 --- /dev/null +++ b/trunk/arch/x86/kernel/cpu/intel_64.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include + +#include "cpu.h" + +static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) +{ + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +} + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, t; + + if (c->cpuid_level < 4) + return 1; + + cpuid_count(4, 0, &eax, &t, &t, &t); + + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + init_intel_cacheinfo(c); + if (c->cpuid_level > 9) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + } + + + if (cpu_has_bts) + ds_init_intel(c); + + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + c->x86_max_cores = intel_num_cpu_cores(c); + + srat_detect_node(); +} + +static struct cpu_dev intel_cpu_dev __cpuinitdata = { + .c_vendor = "Intel", + .c_ident = { "GenuineIntel" }, + .c_early_init = early_init_intel, + .c_init = init_intel, +}; +cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); + diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c index 3f46afbb1cf1..6b0a10b002f1 100644 --- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,8 +1,8 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to indentify caches on Intel CPU. * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -131,18 +130,9 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; - unsigned long can_disable; cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; -#ifdef CONFIG_PCI -static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} -}; -#endif - unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -192,10 +182,9 @@ static unsigned short assocs[] __cpuinitdata = { static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; -static void __cpuinit -amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) { unsigned dummy; unsigned line_size, lines_per_tag, assoc, size_in_kb; @@ -262,40 +251,27 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) -{ - if (index < 3) - return; - this_leaf->can_disable = 1; -} - -static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) amd_cpuid4(index, &eax, &ebx, &ecx); - if (boot_cpu_data.x86 >= 0x10) - amd_check_l3_disable(index, this_leaf); - } else { - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); - } - + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; this_leaf->ebx = ebx; this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } @@ -477,7 +453,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -514,7 +490,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) this_leaf = CPUID4_INFO_IDX(cpu, index); for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); + sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } } @@ -596,7 +572,7 @@ struct _index_kobject { /* pointer to array of kobjects for cpuX/cache/indexY */ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -661,99 +637,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - -#ifdef CONFIG_PCI -static struct pci_dev *get_k8_northbridge(int node) -{ - struct pci_dev *dev = NULL; - int i; - - for (i = 0; i <= node; i++) { - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(&k8_nb_id[0], dev)); - if (!dev) - break; - } - return dev; -} -#else -static struct pci_dev *get_k8_northbridge(int node) -{ - return NULL; -} -#endif - -static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) -{ - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = NULL; - ssize_t ret = 0; - int i; - - if (!this_leaf->can_disable) - return sprintf(buf, "Feature not enabled\n"); - - dev = get_k8_northbridge(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); - return -EINVAL; - } - - for (i = 0; i < 2; i++) { - unsigned int reg; - - pci_read_config_dword(dev, 0x1BC + i * 4, ®); - - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", - buf, (reg & 0x30000) >> 16, reg & 0xfff); - } - return ret; -} - -static ssize_t -store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, - size_t count) -{ - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = NULL; - unsigned int ret, index, val; - - if (!this_leaf->can_disable) - return 0; - - if (strlen(buf) > 15) - return -EINVAL; - - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) - return -EINVAL; - if (index > 1) - return -EINVAL; - - val |= 0xc0000000; - dev = get_k8_northbridge(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); - return -EINVAL; - } - - pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); - wbinvd(); - pci_write_config_dword(dev, 0x1BC + index * 4, val); - - return 1; -} - struct _cache_attr { struct attribute attr; ssize_t (*show)(struct _cpuid4_info *, char *); @@ -774,8 +657,6 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); -static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); - static struct attribute * default_attrs[] = { &type.attr, &level.attr, @@ -786,10 +667,12 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, - &cache_disable.attr, NULL }; +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) { struct _cache_attr *fattr = to_attr(attr); @@ -799,22 +682,14 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) ret = fattr->show ? fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), buf) : - 0; + 0; return ret; } static ssize_t store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count) : - 0; - return ret; + return 0; } static struct sysfs_ops sysfs_ops = { diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b031a4ac856..726a5fcdf341 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -860,7 +860,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) return err; } -static __cpuinit void mce_remove_device(unsigned int cpu) +static void mce_remove_device(unsigned int cpu) { int i; diff --git a/trunk/arch/x86/kernel/cpu/mkcapflags.pl b/trunk/arch/x86/kernel/cpu/mkcapflags.pl deleted file mode 100644 index dfea390e1608..000000000000 --- a/trunk/arch/x86/kernel/cpu/mkcapflags.pl +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/perl -# -# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h -# - -($in, $out) = @ARGV; - -open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; -open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; - -print OUT "#include \n\n"; -print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; - -while (defined($line = )) { - if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { - $macro = $1; - $feature = $2; - $tail = $3; - if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { - $feature = $1; - } - - if ($feature ne '') { - printf OUT "\t%-32s = \"%s\",\n", - "[$macro]", "\L$feature"; - } - } -} -print OUT "};\n"; - -close(IN); -close(OUT); diff --git a/trunk/arch/x86/kernel/cpu/powerflags.c b/trunk/arch/x86/kernel/cpu/powerflags.c deleted file mode 100644 index 5abbea297e0c..000000000000 --- a/trunk/arch/x86/kernel/cpu/powerflags.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Strings for the various x86 power flags - * - * This file must not contain any executable code. - */ - -#include - -const char *const x86_power_flags[32] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ -}; diff --git a/trunk/arch/x86/kernel/cpu/transmeta.c b/trunk/arch/x86/kernel/cpu/transmeta.c index 52b3fefbd5af..b911a2c61b8f 100644 --- a/trunk/arch/x86/kernel/cpu/transmeta.c +++ b/trunk/arch/x86/kernel/cpu/transmeta.c @@ -5,18 +5,6 @@ #include #include "cpu.h" -static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; @@ -24,8 +12,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; - early_init_transmeta(c); - + get_model_name(c); /* Same as AMD/Cyrix */ display_cacheinfo(c); /* Print CMS and CPU revision */ @@ -98,12 +85,23 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } +static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, - .c_early_init = early_init_transmeta, .c_init = init_transmeta, - .c_x86_vendor = X86_VENDOR_TRANSMETA, + .c_identify = transmeta_identify, }; -cpu_dev_register(transmeta_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); diff --git a/trunk/arch/x86/kernel/cpu/umc.c b/trunk/arch/x86/kernel/cpu/umc.c index e777f79e0960..b1fc90989d75 100644 --- a/trunk/arch/x86/kernel/cpu/umc.c +++ b/trunk/arch/x86/kernel/cpu/umc.c @@ -19,8 +19,7 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, - .c_x86_vendor = X86_VENDOR_UMC, }; -cpu_dev_register(umc_cpu_dev); +cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); diff --git a/trunk/arch/x86/kernel/e820.c b/trunk/arch/x86/kernel/e820.c index 78e642feac30..66e48aa2dd1b 100644 --- a/trunk/arch/x86/kernel/e820.c +++ b/trunk/arch/x86/kernel/e820.c @@ -148,9 +148,6 @@ void __init e820_print_map(char *who) case E820_NVS: printk(KERN_CONT "(ACPI NVS)\n"); break; - case E820_UNUSABLE: - printk("(unusable)\n"); - break; default: printk(KERN_CONT "type %u\n", e820.map[i].type); break; @@ -1263,7 +1260,6 @@ static inline const char *e820_type_to_string(int e820_type) case E820_RAM: return "System RAM"; case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; - case E820_UNUSABLE: return "Unusable memory"; default: return "reserved"; } } @@ -1271,7 +1267,6 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ -static struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; @@ -1279,7 +1274,6 @@ void __init e820_reserve_resources(void) u64 end; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); - e820_res = res; for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT @@ -1293,14 +1287,7 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - - /* - * don't register the region that could be conflicted with - * pci device BAR resource and insert them later in - * pcibios_resource_survey() - */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) - insert_resource(&iomem_resource, res); + insert_resource(&iomem_resource, res); res++; } @@ -1312,19 +1299,6 @@ void __init e820_reserve_resources(void) } } -void __init e820_reserve_resources_late(void) -{ - int i; - struct resource *res; - - res = e820_res; - for (i = 0; i < e820.nr_map; i++) { - if (!res->parent && res->end) - reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); - res++; - } -} - char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; diff --git a/trunk/arch/x86/kernel/genapic_64.c b/trunk/arch/x86/kernel/genapic_64.c index 6c9bfc9e1e95..eaff0bbb1444 100644 --- a/trunk/arch/x86/kernel/genapic_64.c +++ b/trunk/arch/x86/kernel/genapic_64.c @@ -16,63 +16,87 @@ #include #include #include -#include #include #include #include -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2xpic_uv_x; -extern struct genapic apic_x2apic_phys; -extern struct genapic apic_x2apic_cluster; +#ifdef CONFIG_ACPI +#include +#endif + +DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; -static struct genapic *apic_probe[] __initdata = { - &apic_x2apic_uv_x, - &apic_x2apic_phys, - &apic_x2apic_cluster, - &apic_physflat, - NULL, -}; +static enum uv_system_type uv_system_type; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init setup_apic_routing(void) { - if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { - if (!intr_remapping_enabled) - genapic = &apic_flat; - } + if (uv_system_type == UV_NON_UNIQUE_APIC) + genapic = &apic_x2apic_uv_x; + else +#ifdef CONFIG_ACPI + /* + * Quirk: some x86_64 machines can only use physical APIC mode + * regardless of how many processors are present (x86_64 ES7000 + * is an example). + */ + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + genapic = &apic_physflat; + else +#endif - if (genapic == &apic_flat) { - if (max_physical_apicid >= 8) - genapic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); - } + if (max_physical_apicid < 8) + genapic = &apic_flat; + else + genapic = &apic_physflat; + + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); } /* Same for both flat and physical. */ -void apic_send_IPI_self(int vector) +void send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int i; - - for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - genapic = apic_probe[i]; - printk(KERN_INFO "Setting APIC routing to %s.\n", - genapic->name); - return 1; - } + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) + uv_system_type = UV_NON_UNIQUE_APIC; } return 0; } + +unsigned int read_apic_id(void) +{ + unsigned int id; + + WARN_ON(preemptible() && num_online_cpus() > 1); + id = apic_read(APIC_ID); + if (uv_system_type >= UV_X2APIC) + id |= __get_cpu_var(x2apic_extra_bits); + return id; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} +EXPORT_SYMBOL_GPL(is_uv_system); diff --git a/trunk/arch/x86/kernel/genapic_flat_64.c b/trunk/arch/x86/kernel/genapic_flat_64.c index 9eca5ba7a6b1..786548a62d38 100644 --- a/trunk/arch/x86/kernel/genapic_flat_64.c +++ b/trunk/arch/x86/kernel/genapic_flat_64.c @@ -15,20 +15,9 @@ #include #include #include -#include #include #include #include -#include - -#ifdef CONFIG_ACPI -#include -#endif - -static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 1; -} static cpumask_t flat_target_cpus(void) { @@ -106,33 +95,9 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } -static unsigned int get_apic_id(unsigned long x) -{ - unsigned int id; - - id = (((x)>>24) & 0xFFu); - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - x = ((id & 0xFFu)<<24); - return x; -} - -static unsigned int read_xapic_id(void) -{ - unsigned int id; - - id = get_apic_id(apic_read(APIC_ID)); - return id; -} - static int flat_apic_id_registered(void) { - return physid_isset(read_xapic_id(), phys_cpu_present_map); + return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -147,7 +112,6 @@ static unsigned int phys_pkg_id(int index_msb) struct genapic apic_flat = { .name = "flat", - .acpi_madt_oem_check = flat_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, @@ -157,12 +121,8 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), }; /* @@ -170,21 +130,6 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ -static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - return 1; -#endif - - return 0; -} static cpumask_t physflat_target_cpus(void) { @@ -231,7 +176,6 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", - .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, @@ -241,10 +185,6 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), }; diff --git a/trunk/arch/x86/kernel/genx2apic_cluster.c b/trunk/arch/x86/kernel/genx2apic_cluster.c deleted file mode 100644 index e4bf2cc0d743..000000000000 --- a/trunk/arch/x86/kernel/genx2apic_cluster.c +++ /dev/null @@ -1,159 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); - -static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (cpu_has_x2apic) - return 1; - - return 0; -} - -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - -static cpumask_t x2apic_target_cpus(void) -{ - return cpumask_of_cpu(0); -} - -/* - * for now each logical cpu is in its own vector allocation domain. - */ -static cpumask_t x2apic_vector_allocation_domain(int cpu) -{ - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; -} - -static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, - unsigned int dest) -{ - unsigned long cfg; - - cfg = __prepare_ICR(0, vector, dest); - - /* - * send the IPI. - */ - x2apic_icr_write(cfg, apicid); -} - -/* - * for now, we send the IPI's one by one in the cpumask. - * TBD: Based on the cpu mask, we can send the IPI's to the cluster group - * at once. We have 16 cpu's in a cluster. This will minimize IPI register - * writes. - */ -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) -{ - unsigned long flags; - unsigned long query_cpu; - - local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - } - local_irq_restore(flags); -} - -static void x2apic_send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); -} - -static void x2apic_send_IPI_all(int vector) -{ - x2apic_send_IPI_mask(cpu_online_map, vector); -} - -static int x2apic_apic_id_registered(void) -{ - return 1; -} - -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int get_apic_id(unsigned long x) -{ - unsigned int id; - - id = x; - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - x = id; - return x; -} - -static unsigned int phys_pkg_id(int index_msb) -{ - return current_cpu_data.initial_apicid >> index_msb; -} - -static void x2apic_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - -static void init_x2apic_ldr(void) -{ - int cpu = smp_processor_id(); - - per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); - return; -} - -struct genapic apic_x2apic_cluster = { - .name = "cluster x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), -}; diff --git a/trunk/arch/x86/kernel/genx2apic_phys.c b/trunk/arch/x86/kernel/genx2apic_phys.c deleted file mode 100644 index 8f1343df2627..000000000000 --- a/trunk/arch/x86/kernel/genx2apic_phys.c +++ /dev/null @@ -1,154 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int x2apic_phys; - -static int set_x2apic_phys_mode(char *arg) -{ - x2apic_phys = 1; - return 0; -} -early_param("x2apic_phys", set_x2apic_phys_mode); - -static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (cpu_has_x2apic && x2apic_phys) - return 1; - - return 0; -} - -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - -static cpumask_t x2apic_target_cpus(void) -{ - return cpumask_of_cpu(0); -} - -static cpumask_t x2apic_vector_allocation_domain(int cpu) -{ - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; -} - -static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, - unsigned int dest) -{ - unsigned long cfg; - - cfg = __prepare_ICR(0, vector, dest); - - /* - * send the IPI. - */ - x2apic_icr_write(cfg, apicid); -} - -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) -{ - unsigned long flags; - unsigned long query_cpu; - - local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -static void x2apic_send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); -} - -static void x2apic_send_IPI_all(int vector) -{ - x2apic_send_IPI_mask(cpu_online_map, vector); -} - -static int x2apic_apic_id_registered(void) -{ - return 1; -} - -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int get_apic_id(unsigned long x) -{ - unsigned int id; - - id = x; - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - x = id; - return x; -} - -static unsigned int phys_pkg_id(int index_msb) -{ - return current_cpu_data.initial_apicid >> index_msb; -} - -void x2apic_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - -void init_x2apic_ldr(void) -{ - return; -} - -struct genapic apic_x2apic_phys = { - .name = "physical x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), -}; diff --git a/trunk/arch/x86/kernel/genx2apic_uv_x.c b/trunk/arch/x86/kernel/genx2apic_uv_x.c index ae2ffc8a400c..bfa837cb16be 100644 --- a/trunk/arch/x86/kernel/genx2apic_uv_x.c +++ b/trunk/arch/x86/kernel/genx2apic_uv_x.c @@ -12,12 +12,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -26,36 +26,6 @@ #include #include -DEFINE_PER_CPU(int, x2apic_extra_bits); - -static enum uv_system_type uv_system_type; - -static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strcmp(oem_id, "SGI")) { - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) { - uv_system_type = UV_NON_UNIQUE_APIC; - return 1; - } - } - return 0; -} - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} -EXPORT_SYMBOL_GPL(is_uv_system); - DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -153,10 +123,6 @@ static int uv_apic_id_registered(void) return 1; } -static void uv_init_apic_ldr(void) -{ -} - static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -172,34 +138,9 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = x | __get_cpu_var(x2apic_extra_bits); - - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - /* maskout x2apic_extra_bits ? */ - x = id; - return x; -} - -static unsigned int uv_read_apic_id(void) -{ - - return get_apic_id(apic_read(APIC_ID)); -} - static unsigned int phys_pkg_id(int index_msb) { - return uv_read_apic_id() >> index_msb; + return GET_APIC_ID(read_apic_id()) >> index_msb; } #ifdef ZZZ /* Needs x2apic patch */ @@ -211,22 +152,17 @@ static void uv_send_IPI_self(int vector) struct genapic apic_x2apic_uv_x = { .name = "UV large system", - .acpi_madt_oem_check = uv_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .apic_id_registered = uv_apic_id_registered, - .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), }; static __cpuinit void set_x2apic_extra_bits(int pnode) @@ -465,5 +401,3 @@ void __cpuinit uv_cpu_init(void) if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } - - diff --git a/trunk/arch/x86/kernel/i387.c b/trunk/arch/x86/kernel/i387.c index 45723f1fe198..eb9ddd8efb82 100644 --- a/trunk/arch/x86/kernel/i387.c +++ b/trunk/arch/x86/kernel/i387.c @@ -21,12 +21,9 @@ # include # include #else -# define save_i387_xstate_ia32 save_i387_xstate -# define restore_i387_xstate_ia32 restore_i387_xstate +# define save_i387_ia32 save_i387 +# define restore_i387_ia32 restore_i387 # define _fpstate_ia32 _fpstate -# define _xstate_ia32 _xstate -# define sig_xstate_ia32_size sig_xstate_size -# define fx_sw_reserved_ia32 fx_sw_reserved # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -39,7 +36,6 @@ static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; -unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; void __cpuinit mxcsr_feature_mask_init(void) @@ -65,11 +61,6 @@ void __init init_thread_xstate(void) return; } - if (cpu_has_xsave) { - xsave_cntxt_init(); - return; - } - if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -92,19 +83,9 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - xsave_init(); - mxcsr_feature_mask_init(); /* clean state in init */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; + current_thread_info()->status = 0; clear_used_math(); } #endif /* CONFIG_X86_64 */ @@ -214,13 +195,6 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, */ target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; - /* - * update the header bits in the xsave header, indicating the - * presence of FP and SSE state. - */ - if (cpu_has_xsave) - target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - return ret; } @@ -421,12 +395,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!ret) convert_to_fxsr(target, &env); - /* - * update the header bit in the xsave header, indicating the - * presence of FP. - */ - if (cpu_has_xsave) - target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } @@ -439,6 +407,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; + unlazy_fpu(tsk); fp->status = fp->swd; if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; @@ -452,6 +421,8 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err = 0; + unlazy_fpu(tsk); + convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env))) return -1; @@ -461,40 +432,16 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) + if (__copy_to_user(&buf->_fxsr_env[0], fx, + sizeof(struct i387_fxsave_struct))) return -1; return 1; } -static int save_i387_xsave(void __user *buf) -{ - struct _fpstate_ia32 __user *fx = buf; - int err = 0; - - if (save_i387_fxsave(fx) < 0) - return -1; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, - sizeof(struct _fpx_sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_ia32_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return -1; - - return 1; -} - -int save_i387_xstate_ia32(void __user *buf) +int save_i387_ia32(struct _fpstate_ia32 __user *buf) { - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - struct task_struct *tsk = current; - if (!used_math()) return 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) - return -EACCES; /* * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. @@ -504,17 +451,13 @@ int save_i387_xstate_ia32(void __user *buf) if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) ? -1 : 1; + NULL, buf) ? -1 : 1; } - unlazy_fpu(tsk); - - if (cpu_has_xsave) - return save_i387_xsave(fp); if (cpu_has_fxsr) - return save_i387_fxsave(fp); + return save_i387_fxsave(buf); else - return save_i387_fsave(fp); + return save_i387_fsave(buf); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) @@ -525,15 +468,14 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) sizeof(struct i387_fsave_struct)); } -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, - unsigned int size) +static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; struct user_i387_ia32_struct env; int err; err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], - size); + sizeof(struct i387_fxsave_struct)); /* mxcsr reserved bits must be masked to zero for security reasons */ tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) @@ -543,69 +485,14 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, return 0; } -static int restore_i387_xsave(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - struct _fpstate_ia32 __user *fx_user = - ((struct _fpstate_ia32 __user *) buf); - struct i387_fxsave_struct __user *fx = - (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; - struct xsave_hdr_struct *xsave_hdr = - ¤t->thread.xstate->xsave.xsave_hdr; - u64 mask; - int err; - - if (check_for_xstate(fx, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - /* - * Init the state that is not present in the memory layout - * and enabled by the OS. - */ - mask = ~(pcntxt_mask & ~mask); - xsave_hdr->xstate_bv &= mask; - - return err; -fx_only: - /* - * Couldn't find the extended state information in the memory - * layout. Restore the FP/SSE and init the other extended state - * enabled by the OS. - */ - xsave_hdr->xstate_bv = XSTATE_FPSSE; - return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); -} - -int restore_i387_xstate_ia32(void __user *buf) +int restore_i387_ia32(struct _fpstate_ia32 __user *buf) { int err; struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; if (HAVE_HWFP) clear_fpu(tsk); - if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) - return -EACCES; - if (!used_math()) { err = init_fpu(tsk); if (err) @@ -613,17 +500,14 @@ int restore_i387_xstate_ia32(void __user *buf) } if (HAVE_HWFP) { - if (cpu_has_xsave) - err = restore_i387_xsave(buf); - else if (cpu_has_fxsr) - err = restore_i387_fxsave(fp, sizeof(struct - i387_fxsave_struct)); + if (cpu_has_fxsr) + err = restore_i387_fxsave(buf); else - err = restore_i387_fsave(fp); + err = restore_i387_fsave(buf); } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) != 0; + NULL, buf) != 0; } set_used_math(); diff --git a/trunk/arch/x86/kernel/i8259.c b/trunk/arch/x86/kernel/i8259.c index 4b8a53d841f7..dc92b49d9204 100644 --- a/trunk/arch/x86/kernel/i8259.c +++ b/trunk/arch/x86/kernel/i8259.c @@ -282,30 +282,6 @@ static int __init i8259A_init_sysfs(void) device_initcall(i8259A_init_sysfs); -void mask_8259A(void) -{ - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void unmask_8259A(void) -{ - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - void init_8259A(int auto_eoi) { unsigned long flags; diff --git a/trunk/arch/x86/kernel/io_apic_32.c b/trunk/arch/x86/kernel/io_apic_32.c index e710289f673e..09cddb57bec4 100644 --- a/trunk/arch/x86/kernel/io_apic_32.c +++ b/trunk/arch/x86/kernel/io_apic_32.c @@ -46,13 +46,10 @@ #include #include #include -#include #include #include -#define __apicdebuginit(type) static type __init - int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -1344,8 +1341,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } - -__apicdebuginit(void) print_IO_APIC(void) +void __init print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,7 +1456,9 @@ __apicdebuginit(void) print_IO_APIC(void) return; } -__apicdebuginit(void) print_APIC_bitfield(int base) +#if 0 + +static void print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1481,10 +1479,9 @@ __apicdebuginit(void) print_APIC_bitfield(int base) } } -__apicdebuginit(void) print_local_APIC(void *dummy) +void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; - u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1493,7 +1490,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(v)); + GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1535,9 +1532,10 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + v = apic_read(APIC_ICR); + printk(KERN_DEBUG "... APIC ICR: %08x\n", v); + v = apic_read(APIC_ICR2); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); @@ -1565,12 +1563,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +void print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -__apicdebuginit(void) print_PIC(void) +void /*__init*/ print_PIC(void) { unsigned int v; unsigned long flags; @@ -1602,17 +1600,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - +#endif /* 0 */ static void __init enable_IO_APIC(void) { @@ -1710,7 +1698,8 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = read_apic_id(); + entry.dest.physical.physical_dest = + GET_APIC_ID(read_apic_id()); /* * Add it to the IO-APIC irq-routing table: @@ -1736,8 +1725,10 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) +#ifdef CONFIG_X86_NUMAQ + if (found_numaq) return; +#endif /* * Don't check I/O APIC IDs for xAPIC systems. They have @@ -2338,6 +2329,8 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); + if (!acpi_ioapic) + print_IO_APIC(); } /* diff --git a/trunk/arch/x86/kernel/io_apic_64.c b/trunk/arch/x86/kernel/io_apic_64.c index a1bec2969c6a..61a83b70c18f 100644 --- a/trunk/arch/x86/kernel/io_apic_64.c +++ b/trunk/arch/x86/kernel/io_apic_64.c @@ -37,7 +37,6 @@ #include #endif #include -#include #include #include @@ -50,13 +49,10 @@ #include #include #include -#include #include #include -#define __apicdebuginit(type) static type __init - struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -91,6 +87,8 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; +#define __apicdebuginit __init + int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -110,9 +108,6 @@ static DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; -/* I/O APIC RTE contents at the OS boot up */ -struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -308,12 +303,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -450,69 +440,6 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } -/* - * Saves and masks all the unmasked IO-APIC RTE's - */ -int save_mask_IO_APIC_setup(void) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - int apic, pin; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - - for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - return -ENOMEM; - } - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = - ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } - return 0; -} - -void restore_IO_APIC_setup(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); -} - -void reinit_intr_remapped_IO_APIC(int intr_remapping) -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(); -} - int skip_ioapic_setup; int ioapic_force; @@ -912,98 +839,18 @@ void __setup_vector_irq(int cpu) } static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip; -#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) + if (trigger) { irq_desc[irq].status |= IRQ_LEVEL; - else - irq_desc[irq].status &= ~IRQ_LEVEL; - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - irq_desc[irq].status |= IRQ_MOVE_PCNTXT; - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; - } -#endif - if (trigger) set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else + handle_fasteoi_irq, "fasteoi"); + } else { + irq_desc[irq].status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); -} - -static int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) -{ - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; - - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); - - memset(&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = vector; - irte.dest_id = IRTE_DEST(destination); - - modify_irte(irq, &irte); - - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - } else -#endif - { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; } - - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; - entry->vector = vector; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry->mask = 1; - return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -1028,15 +875,24 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.dest = cpu_mask_to_apicid(mask); + entry.mask = 0; /* enable IRQ */ + entry.trigger = trigger; + entry.polarity = polarity; + entry.vector = cfg->vector; - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); - return; - } + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry.mask = 1; ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1088,9 +944,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; - if (intr_remapping_enabled) - return; - memset(&entry, 0, sizeof(entry)); /* @@ -1117,8 +970,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } - -__apicdebuginit(void) print_IO_APIC(void) +void __apicdebuginit print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1212,7 +1064,9 @@ __apicdebuginit(void) print_IO_APIC(void) return; } -__apicdebuginit(void) print_APIC_bitfield(int base) +#if 0 + +static __apicdebuginit void print_APIC_bitfield (int base) { unsigned int v; int i, j; @@ -1233,10 +1087,9 @@ __apicdebuginit(void) print_APIC_bitfield(int base) } } -__apicdebuginit(void) print_local_APIC(void *dummy) +void __apicdebuginit print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; - unsigned long icr; if (apic_verbosity == APIC_QUIET) return; @@ -1244,7 +1097,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1280,9 +1133,10 @@ __apicdebuginit(void) print_local_APIC(void *dummy) v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + v = apic_read(APIC_ICR); + printk(KERN_DEBUG "... APIC ICR: %08x\n", v); + v = apic_read(APIC_ICR2); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); @@ -1310,12 +1164,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +void print_all_local_APICs (void) { on_each_cpu(print_local_APIC, NULL, 1); } -__apicdebuginit(void) print_PIC(void) +void __apicdebuginit print_PIC(void) { unsigned int v; unsigned long flags; @@ -1347,17 +1201,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - +#endif /* 0 */ void __init enable_IO_APIC(void) { @@ -1447,7 +1291,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = read_apic_id(); + entry.dest = GET_APIC_ID(read_apic_id()); /* * Add it to the IO-APIC irq-routing table: @@ -1553,147 +1397,6 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP - -#ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. - * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. - */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) -{ - struct irq_cfg *cfg = irq_cfg + irq; - struct irq_desc *desc = irq_desc + irq; - cpumask_t tmp, cleanup_mask; - struct irte irte; - int modify_ioapic_rte = desc->status & IRQ_LEVEL; - unsigned int dest; - unsigned long flags; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Modified the IRTE and flushes the Interrupt entry cache. - */ - modify_irte(irq, &irte); - - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - irq_desc[irq].affinity = mask; -} - -static int migrate_irq_remapped_level(int irq) -{ - int ret = -1; - - mask_IO_APIC_irq(irq); - - if (io_apic_level_ack_pending(irq)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); - - ret = 0; - irq_desc[irq].status &= ~IRQ_MOVE_PENDING; - cpus_clear(irq_desc[irq].pending_mask); - -unmask: - unmask_IO_APIC_irq(irq); - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - int irq; - - for (irq = 0; irq < NR_IRQS; irq++) { - struct irq_desc *desc = irq_desc + irq; - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, - irq_desc[irq].pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - -/* - * Migrates the IRQ destination in the process context. - */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - if (irq_desc[irq].status & IRQ_LEVEL) { - irq_desc[irq].status |= IRQ_MOVE_PENDING; - irq_desc[irq].pending_mask = mask; - migrate_irq_remapped_level(irq); - return; - } - - migrate_ioapic_irq(irq, mask); -} -#endif - asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1750,17 +1453,6 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif -#ifdef CONFIG_INTR_REMAP -static void ack_x2apic_level(unsigned int irq) -{ - ack_x2APIC_irq(); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif static void ack_apic_edge(unsigned int irq) { @@ -1835,21 +1527,6 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; -#endif - static inline void init_IO_APIC_traps(void) { int irq; @@ -2035,8 +1712,6 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2063,8 +1738,6 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2181,6 +1854,8 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); + if (!acpi_ioapic) + print_IO_APIC(); } struct sysfs_ioapic_data { @@ -2302,9 +1977,6 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); -#ifdef CONFIG_INTR_REMAP - free_irte(irq); -#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2323,41 +1995,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (err) - return err; - - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - memset (&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - modify_irte(irq, &irte); + if (!err) { + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2408,55 +2049,6 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } - -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg = irq_cfg + irq; - unsigned int dest; - cpumask_t tmp, cleanup_mask; - struct irte irte; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - irq_desc[irq].affinity = mask; -} -#endif #endif /* CONFIG_SMP */ /* @@ -2474,157 +2066,26 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_x2apic_edge, -#ifdef CONFIG_SMP - .set_affinity = ir_set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} -#endif - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) - return ret; - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irq_desc *desc = irq_desc + irq; - /* - * irq migration in process context - */ - desc->status |= IRQ_MOVE_PCNTXT; - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else -#endif - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - - return 0; -} - int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { + struct msi_msg msg; int irq, ret; - irq = create_irq(); if (irq < 0) return irq; -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); + ret = msi_compose_msg(dev, irq, &msg); if (ret < 0) { destroy_irq(irq); return ret; } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - int irq, ret, sub_handle; - struct msi_desc *desc; -#ifdef CONFIG_INTR_REMAP - struct intel_iommu *iommu = 0; - int index = 0; -#endif + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq(); - if (irq < 0) - return irq; -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - } -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } return 0; - -error: - destroy_irq(irq); - return ret; } void arch_teardown_msi_irq(unsigned int irq) @@ -2872,10 +2333,6 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); -#ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c index 9200a1e2752d..d66914287ee1 100644 --- a/trunk/arch/x86/kernel/irqinit_32.c +++ b/trunk/arch/x86/kernel/irqinit_32.c @@ -74,15 +74,6 @@ void __init init_ISA_irqs (void) } } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -107,46 +98,6 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -#endif - -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) - /* thermal monitor LVT interrupt */ - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif - - if (!acpi_ioapic) - setup_irq(2, &irq2); - /* setup after call gates are initialised (usually add in * the architecture specific gates) */ diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c index f98f4e1dba09..b3fb430725cb 100644 --- a/trunk/arch/x86/kernel/mpparse.c +++ b/trunk/arch/x86/kernel/mpparse.c @@ -397,9 +397,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) generic_bigsmp_probe(); #endif -#ifdef CONFIG_X86_32 setup_apic_routing(); -#endif if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/trunk/arch/x86/kernel/numaq_32.c b/trunk/arch/x86/kernel/numaq_32.c index 4caff39078e0..eecc8c18f010 100644 --- a/trunk/arch/x86/kernel/numaq_32.c +++ b/trunk/arch/x86/kernel/numaq_32.c @@ -229,12 +229,6 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } -static int __init numaq_setup_ioapic_ids(void) -{ - /* so can skip it */ - return 1; -} - static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -249,7 +243,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, - .setup_ioapic_ids = numaq_setup_ioapic_ids, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/trunk/arch/x86/kernel/paravirt.c b/trunk/arch/x86/kernel/paravirt.c index 6b0bb73998dd..e2f43768723a 100644 --- a/trunk/arch/x86/kernel/paravirt.c +++ b/trunk/arch/x86/kernel/paravirt.c @@ -374,6 +374,8 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC + .apic_write = native_apic_write, + .apic_read = native_apic_read, .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c index c622772744d8..ec7a2ba9bce8 100644 --- a/trunk/arch/x86/kernel/process.c +++ b/trunk/arch/x86/kernel/process.c @@ -15,6 +15,7 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index 46c98efbbf8d..141efab52400 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -758,8 +758,6 @@ void __init setup_arch(char **cmdline_p) #else num_physpages = max_pfn; - if (cpu_has_x2apic) - check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ diff --git a/trunk/arch/x86/kernel/sigframe.h b/trunk/arch/x86/kernel/sigframe.h index cc673aa55ce4..8b4956e800ac 100644 --- a/trunk/arch/x86/kernel/sigframe.h +++ b/trunk/arch/x86/kernel/sigframe.h @@ -3,18 +3,9 @@ struct sigframe { char __user *pretcode; int sig; struct sigcontext sc; - /* - * fpstate is unused. fpstate is moved/allocated after - * retcode[] below. This movement allows to have the FP state and the - * future state extensions (xsave) stay together. - * And at the same time retaining the unused fpstate, prevents changing - * the offset of extramask[] in the sigframe and thus prevent any - * legacy application accessing/modifying it. - */ - struct _fpstate fpstate_unused; + struct _fpstate fpstate; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; - /* fp state follows here */ }; struct rt_sigframe { @@ -24,15 +15,14 @@ struct rt_sigframe { void __user *puc; struct siginfo info; struct ucontext uc; + struct _fpstate fpstate; char retcode[8]; - /* fp state follows here */ }; #else struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; - /* fp state follows here */ }; int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, diff --git a/trunk/arch/x86/kernel/signal_32.c b/trunk/arch/x86/kernel/signal_32.c index b21070ea33a4..2a2435d3037d 100644 --- a/trunk/arch/x86/kernel/signal_32.c +++ b/trunk/arch/x86/kernel/signal_32.c @@ -161,14 +161,28 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - void __user *buf; + struct _fpstate __user *buf; err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } else { + struct task_struct *me = current; + + if (used_math()) { + clear_fpu(me); + clear_used_math(); + } + } } err |= __get_user(*pax, &sc->ax); return err; + +badframe: + return 1; } asmlinkage unsigned long sys_sigreturn(unsigned long __unused) @@ -250,7 +264,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) * Set up a signal frame. */ static int -setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, +setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; @@ -277,7 +291,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - tmp = save_i387_xstate(fpstate); + tmp = save_i387(fpstate); if (tmp < 0) err = 1; else @@ -294,8 +308,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) { unsigned long sp; @@ -321,11 +334,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = (unsigned long) ka->sa.sa_restorer; } - if (used_math()) { - sp = sp - sig_xstate_size; - *fpstate = (struct _fpstate *) sp; - } - sp -= frame_size; /* * Align the stack pointer according to the i386 ABI, @@ -344,9 +352,8 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; - void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -361,7 +368,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (err) goto give_sigsegv; - err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -422,9 +429,8 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; - void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -443,16 +449,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/trunk/arch/x86/kernel/signal_64.c b/trunk/arch/x86/kernel/signal_64.c index 823a55bf8c39..694aa888bb19 100644 --- a/trunk/arch/x86/kernel/signal_64.c +++ b/trunk/arch/x86/kernel/signal_64.c @@ -52,6 +52,69 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, return do_sigaltstack(uss, uoss, regs->sp); } +/* + * Signal frame handlers. + */ + +static inline int save_i387(struct _fpstate __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.xstate->fxsave)); + + if ((unsigned long)buf % 16) + printk("save_i387: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + err = save_i387_checking((struct i387_fxsave_struct __user *) + buf); + if (err) + return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, + sizeof(struct i387_fxsave_struct))) + return -1; + } + return 1; +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +static inline int restore_i387(struct _fpstate __user *buf) +{ + struct task_struct *tsk = current; + int err; + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; +} + /* * Do a signal return; undo the signal stack. */ @@ -96,11 +159,25 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, { struct _fpstate __user *buf; err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } else { + struct task_struct *me = current; + if (used_math()) { + clear_fpu(me); + clear_used_math(); + } + } } err |= __get_user(*pax, &sc->ax); return err; + +badframe: + return 1; } asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) @@ -192,23 +269,26 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 64); + return (void __user *)round_down(sp - size, 16); } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - void __user *fp = NULL; + struct _fpstate __user *fp = NULL; int err = 0; struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sig_xstate_size); + fp = get_stack(ka, regs, sizeof(struct _fpstate)); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - if (save_i387_xstate(fp) < 0) + if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) + goto give_sigsegv; + + if (save_i387(fp) < 0) err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; @@ -223,10 +303,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index 9056f7e272c0..4e7ccb0e2a9b 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -123,6 +123,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; +static int boot_cpu_logical_apicid; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -164,8 +165,6 @@ static void unmap_cpu_to_node(int cpu) #endif #ifdef CONFIG_X86_32 -static int boot_cpu_logical_apicid; - u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -211,7 +210,7 @@ static void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = read_apic_id(); + phys_id = GET_APIC_ID(read_apic_id()); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -552,7 +551,8 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_icr_write(APIC_DM_REMRD | regs[i], apicid); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); timeout = 0; do { @@ -584,9 +584,11 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); + /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); + apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -639,11 +641,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + /* * Send IPI */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, - phys_apicid); + apic_write(APIC_ICR, + APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -653,8 +657,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) pr_debug("Deasserting INIT.\n"); /* Target chip */ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + /* Send IPI */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); + apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -697,10 +703,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), - phys_apicid); + apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); /* * Give the other CPU some time to accept the IPI. @@ -1169,17 +1176,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ -#ifdef CONFIG_X86_32 boot_cpu_logical_apicid = logical_smp_processor_id(); -#endif current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); -#ifdef CONFIG_X86_64 - enable_IR_x2apic(); - setup_apic_routing(); -#endif - if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); @@ -1187,9 +1187,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_disable(); - if (read_apic_id() != boot_cpu_physical_apicid) { + if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); + GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } preempt_enable(); diff --git a/trunk/arch/x86/kernel/summit_32.c b/trunk/arch/x86/kernel/summit_32.c index 7b987852e876..d67ce5f044ba 100644 --- a/trunk/arch/x86/kernel/summit_32.c +++ b/trunk/arch/x86/kernel/summit_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; diff --git a/trunk/arch/x86/kernel/traps_32.c b/trunk/arch/x86/kernel/traps_32.c index da5a5964fccb..03df8e45e5a1 100644 --- a/trunk/arch/x86/kernel/traps_32.c +++ b/trunk/arch/x86/kernel/traps_32.c @@ -1228,6 +1228,7 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); + init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/trunk/arch/x86/kernel/traps_64.c b/trunk/arch/x86/kernel/traps_64.c index 2887a789e38f..7a31f104bef9 100644 --- a/trunk/arch/x86/kernel/traps_64.c +++ b/trunk/arch/x86/kernel/traps_64.c @@ -1138,7 +1138,7 @@ asmlinkage void math_state_restore(void) /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ - if (unlikely(restore_fpu_checking(me))) { + if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { stts(); force_sig(SIGSEGV, me); return; @@ -1178,6 +1178,10 @@ void __init trap_init(void) #ifdef CONFIG_IA32_EMULATION set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif + /* + * initialize the per thread extended state: + */ + init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/trunk/arch/x86/kernel/vmi_32.c b/trunk/arch/x86/kernel/vmi_32.c index 8b6c393ab9fd..8c9ad02af5a2 100644 --- a/trunk/arch/x86/kernel/vmi_32.c +++ b/trunk/arch/x86/kernel/vmi_32.c @@ -905,8 +905,8 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(apic_ops->read, APICRead); - para_fill(apic_ops->write, APICWrite); + para_fill(pv_apic_ops.apic_read, APICRead); + para_fill(pv_apic_ops.apic_write, APICWrite); #endif /* diff --git a/trunk/arch/x86/kernel/vmlinux_32.lds.S b/trunk/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc2..af5bdad84604 100644 --- a/trunk/arch/x86/kernel/vmlinux_32.lds.S +++ b/trunk/arch/x86/kernel/vmlinux_32.lds.S @@ -140,10 +140,10 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; + .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { + __x86cpuvendor_start = .; + *(.x86cpuvendor.init) + __x86cpuvendor_end = .; } SECURITY_INIT . = ALIGN(4); @@ -180,7 +180,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; - *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/trunk/arch/x86/kernel/vmlinux_64.lds.S b/trunk/arch/x86/kernel/vmlinux_64.lds.S index 201e81a91a95..63e5c1a22e88 100644 --- a/trunk/arch/x86/kernel/vmlinux_64.lds.S +++ b/trunk/arch/x86/kernel/vmlinux_64.lds.S @@ -168,12 +168,13 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; - __x86_cpu_dev_start = .; - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - *(.x86_cpu_dev.init) + . = ALIGN(16); + __x86cpuvendor_start = .; + .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { + *(.x86cpuvendor.init) } + __x86cpuvendor_end = .; SECURITY_INIT - __x86_cpu_dev_end = .; . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { diff --git a/trunk/arch/x86/kernel/xsave.c b/trunk/arch/x86/kernel/xsave.c deleted file mode 100644 index 07713d64debe..000000000000 --- a/trunk/arch/x86/kernel/xsave.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * xsave/xrstor support. - * - * Author: Suresh Siddha - */ -#include -#include -#include -#ifdef CONFIG_IA32_EMULATION -#include -#endif -#include - -/* - * Supported feature mask by the CPU and the kernel. - */ -u64 pcntxt_mask; - -struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -struct _fpx_sw_bytes fx_sw_reserved_ia32; -#endif - -/* - * Check for the presence of extended state information in the - * user fpstate pointer in the sigcontext. - */ -int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw_user) -{ - int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xsave_hdr_struct); - unsigned int magic2; - int err; - - err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], - sizeof(struct _fpx_sw_bytes)); - - if (err) - return err; - - /* - * First Magic check failed. - */ - if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) - return -1; - - /* - * Check for error scenarios. - */ - if (fx_sw_user->xstate_size < min_xstate_size || - fx_sw_user->xstate_size > xstate_size || - fx_sw_user->xstate_size > fx_sw_user->extended_size) - return -1; - - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); - /* - * Check for the presence of second magic word at the end of memory - * layout. This detects the case where the user just copied the legacy - * fpstate layout with out copying the extended state information - * in the memory layout. - */ - if (err || magic2 != FP_XSTATE_MAGIC2) - return -1; - - return 0; -} - -#ifdef CONFIG_X86_64 -/* - * Signal frame handlers. - */ - -int save_i387_xstate(void __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) - return -EACCES; - - BUG_ON(sig_xstate_size < xstate_size); - - if ((unsigned long)buf % 64) - printk("save_i387_xstate: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - /* - * Start with clearing the user buffer. This will present a - * clean context for the bytes not touched by the fxsave/xsave. - */ - __clear_user(buf, sig_xstate_size); - - if (task_thread_info(tsk)->status & TS_XSAVE) - err = xsave_user(buf); - else - err = fxsave_user(buf); - - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - xstate_size)) - return -1; - } - - if (task_thread_info(tsk)->status & TS_XSAVE) { - struct _fpstate __user *fx = buf; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_size - - FP_XSTATE_MAGIC2_SIZE)); - } - - return 1; -} - -/* - * Restore the extended state if present. Otherwise, restore the FP/SSE - * state. - */ -int restore_user_xstate(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - u64 mask; - int err; - - if (((unsigned long)buf % 64) || - check_for_xstate(buf, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - /* - * restore the state passed by the user. - */ - err = xrestore_user(buf, mask); - if (err) - return err; - - /* - * init the state skipped by the user. - */ - mask = pcntxt_mask & ~mask; - - xrstor_state(init_xstate_buf, mask); - - return 0; - -fx_only: - /* - * couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -int restore_i387_xstate(void __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - if (!buf) { - if (used_math()) - goto clear; - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - if (task_thread_info(tsk)->status & TS_XSAVE) - err = restore_user_xstate(buf); - else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); - if (unlikely(err)) { - /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. - */ -clear: - clear_fpu(tsk); - clear_used_math(); - } - return err; -} -#endif - -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -void prepare_fx_sw_frame(void) -{ - int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + - FP_XSTATE_MAGIC2_SIZE; - - sig_xstate_size = sizeof(struct _fpstate) + size_extended; - -#ifdef CONFIG_IA32_EMULATION - sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; -#endif - - memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = sig_xstate_size; - fx_sw_reserved.xstate_bv = pcntxt_mask; - fx_sw_reserved.xstate_size = xstate_size; -#ifdef CONFIG_IA32_EMULATION - memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; -#endif -} - -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct *init_xstate_buf; - -#ifdef CONFIG_X86_64 -unsigned int sig_xstate_size = sizeof(struct _fpstate); -#endif - -/* - * Enable the extended processor state save/restore feature - */ -void __cpuinit xsave_init(void) -{ - if (!cpu_has_xsave) - return; - - set_in_cr4(X86_CR4_OSXSAVE); - - /* - * Enable all the features that the HW is capable of - * and the Linux kernel is aware of. - */ - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); -} - -/* - * setup the xstate image representing the init state - */ -void setup_xstate_init(void) -{ - init_xstate_buf = alloc_bootmem(xstate_size); - init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; -} - -/* - * Enable and initialize the xsave feature. - */ -void __init xsave_cntxt_init(void) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); - pcntxt_mask = eax + ((u64)edx << 32); - - if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", - pcntxt_mask); - BUG(); - } - - /* - * for now OS knows only about FP/SSE - */ - pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - xsave_init(); - - /* - * Recompute the context size for enabled features - */ - cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; - - prepare_fx_sw_frame(); - - setup_xstate_init(); - - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " - "cntxt size 0x%x\n", - pcntxt_mask, xstate_size); -} diff --git a/trunk/arch/x86/kvm/vmx.h b/trunk/arch/x86/kvm/vmx.h index 17e25995b65b..23e8373507ad 100644 --- a/trunk/arch/x86/kvm/vmx.h +++ b/trunk/arch/x86/kvm/vmx.h @@ -331,6 +331,21 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 +#define MSR_IA32_VMX_BASIC 0x480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x482 +#define MSR_IA32_VMX_EXIT_CTLS 0x483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x484 +#define MSR_IA32_VMX_MISC 0x485 +#define MSR_IA32_VMX_CR0_FIXED0 0x486 +#define MSR_IA32_VMX_CR0_FIXED1 0x487 +#define MSR_IA32_VMX_CR4_FIXED0 0x488 +#define MSR_IA32_VMX_CR4_FIXED1 0x489 +#define MSR_IA32_VMX_VMCS_ENUM 0x48a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c + +#define MSR_IA32_FEATURE_CONTROL 0x3a #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 diff --git a/trunk/arch/x86/lguest/boot.c b/trunk/arch/x86/lguest/boot.c index 65f0b8a47bed..d9249a882aa5 100644 --- a/trunk/arch/x86/lguest/boot.c +++ b/trunk/arch/x86/lguest/boot.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -784,44 +783,14 @@ static void lguest_wbinvd(void) * code qualifies for Advanced. It will also never interrupt anything. It * does, however, allow us to get through the Linux boot code. */ #ifdef CONFIG_X86_LOCAL_APIC -static void lguest_apic_write(u32 reg, u32 v) +static void lguest_apic_write(unsigned long reg, u32 v) { } -static u32 lguest_apic_read(u32 reg) +static u32 lguest_apic_read(unsigned long reg) { return 0; } - -static u64 lguest_apic_icr_read(void) -{ - return 0; -} - -static void lguest_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void lguest_apic_wait_icr_idle(void) -{ - return; -} - -static u32 lguest_apic_safe_wait_icr_idle(void) -{ - return 0; -} - -static struct apic_ops lguest_basic_apic_ops = { - .read = lguest_apic_read, - .write = lguest_apic_write, - .icr_read = lguest_apic_icr_read, - .icr_write = lguest_apic_icr_write, - .wait_icr_idle = lguest_apic_wait_icr_idle, - .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle, -}; #endif /* STOP! Until an interrupt comes in. */ @@ -1021,7 +990,8 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ - apic_ops = &lguest_basic_apic_ops; + pv_apic_ops.apic_write = lguest_apic_write; + pv_apic_ops.apic_read = lguest_apic_read; #endif /* time operations */ diff --git a/trunk/arch/x86/lib/Makefile b/trunk/arch/x86/lib/Makefile index 55e11aa6d66c..aa3fa4119424 100644 --- a/trunk/arch/x86/lib/Makefile +++ b/trunk/arch/x86/lib/Makefile @@ -17,6 +17,9 @@ ifeq ($(CONFIG_X86_32),y) lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o + + CFLAGS_csum-partial_64.o := -funroll-loops + lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o diff --git a/trunk/arch/x86/lib/usercopy_32.c b/trunk/arch/x86/lib/usercopy_32.c index 9e68075544f6..24e60944971a 100644 --- a/trunk/arch/x86/lib/usercopy_32.c +++ b/trunk/arch/x86/lib/usercopy_32.c @@ -14,13 +14,6 @@ #include #include -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) { #ifdef CONFIG_X86_INTEL_USERCOPY diff --git a/trunk/arch/x86/mach-default/setup.c b/trunk/arch/x86/mach-default/setup.c index 37b9ae4d44c5..3f2cf11f201a 100644 --- a/trunk/arch/x86/mach-default/setup.c +++ b/trunk/arch/x86/mach-default/setup.c @@ -38,6 +38,15 @@ void __init pre_intr_init_hook(void) init_ISA_irqs(); } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /** * intr_init_hook - post gate setup interrupt initialisation * @@ -53,6 +62,12 @@ void __init intr_init_hook(void) if (x86_quirks->arch_intr_init()) return; } +#ifdef CONFIG_X86_LOCAL_APIC + apic_intr_init(); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); } /** diff --git a/trunk/arch/x86/mach-es7000/Makefile b/trunk/arch/x86/mach-es7000/Makefile new file mode 100644 index 000000000000..3ef8b43b62fc --- /dev/null +++ b/trunk/arch/x86/mach-es7000/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the linux kernel. +# + +obj-$(CONFIG_X86_ES7000) := es7000plat.o diff --git a/trunk/arch/x86/mach-es7000/es7000.h b/trunk/arch/x86/mach-es7000/es7000.h new file mode 100644 index 000000000000..c8d5aa132fa0 --- /dev/null +++ b/trunk/arch/x86/mach-es7000/es7000.h @@ -0,0 +1,114 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +extern int parse_unisys_oem (char *oemptr); +extern void setup_unisys(void); +extern int es7000_start_cpu(int cpu, unsigned long eip); +extern void es7000_sw_apic(void); diff --git a/trunk/arch/x86/kernel/es7000_32.c b/trunk/arch/x86/mach-es7000/es7000plat.c similarity index 78% rename from trunk/arch/x86/kernel/es7000_32.c rename to trunk/arch/x86/mach-es7000/es7000plat.c index 849e5cd485b8..50189af14b85 100644 --- a/trunk/arch/x86/kernel/es7000_32.c +++ b/trunk/arch/x86/mach-es7000/es7000plat.c @@ -39,92 +39,9 @@ #include #include #include +#include "es7000.h" #include -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) - -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; -}; - -struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; -}; - -#ifdef CONFIG_ACPI - -struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -#endif - -struct mip_reg { - unsigned long long off_0; - unsigned long long off_8; - unsigned long long off_10; - unsigned long long off_18; - unsigned long long off_20; - unsigned long long off_28; - unsigned long long off_30; - unsigned long long off_38; -}; - -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) - /* * ES7000 Globals */ @@ -155,7 +72,7 @@ es7000_rename_gsi(int ioapic, int gsi) base += nr_ioapic_registers[i]; } - if (!ioapic && (gsi < 16)) + if (!ioapic && (gsi < 16)) gsi += base; return gsi; } diff --git a/trunk/arch/x86/mach-generic/Makefile b/trunk/arch/x86/mach-generic/Makefile index 6730f4e7c744..0dbd7803a1d5 100644 --- a/trunk/arch/x86/mach-generic/Makefile +++ b/trunk/arch/x86/mach-generic/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o +obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ diff --git a/trunk/arch/x86/mach-generic/bigsmp.c b/trunk/arch/x86/mach-generic/bigsmp.c index df37fc9d6a26..59d771714559 100644 --- a/trunk/arch/x86/mach-generic/bigsmp.c +++ b/trunk/arch/x86/mach-generic/bigsmp.c @@ -5,17 +5,18 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include #include #include +#include #include #include -#include -#include -#include -#include +#include +#include +#include #include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/trunk/arch/x86/mach-generic/es7000.c b/trunk/arch/x86/mach-generic/es7000.c index 520cca0ee04e..4742626f08c4 100644 --- a/trunk/arch/x86/mach-generic/es7000.c +++ b/trunk/arch/x86/mach-generic/es7000.c @@ -4,19 +4,20 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include #include #include #include -#include -#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include static int probe_es7000(void) { diff --git a/trunk/arch/x86/mach-generic/numaq.c b/trunk/arch/x86/mach-generic/numaq.c index 8cf58394975e..8091e68764c4 100644 --- a/trunk/arch/x86/mach-generic/numaq.c +++ b/trunk/arch/x86/mach-generic/numaq.c @@ -4,6 +4,7 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include @@ -11,12 +12,11 @@ #include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include static int mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/trunk/arch/x86/mach-generic/summit.c b/trunk/arch/x86/mach-generic/summit.c index 6ad6b67a723d..a97ea0f35b1e 100644 --- a/trunk/arch/x86/mach-generic/summit.c +++ b/trunk/arch/x86/mach-generic/summit.c @@ -4,18 +4,19 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include #include #include #include -#include -#include #include -#include -#include -#include +#include +#include +#include +#include +#include static int probe_summit(void) { diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c index 83e13f2d53d2..fb30486c82f7 100644 --- a/trunk/arch/x86/mm/init_64.c +++ b/trunk/arch/x86/mm/init_64.c @@ -88,62 +88,6 @@ early_param("gbpages", parse_direct_gbpages_on); int after_bootmem; -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* - * noexec=on|off - * Control non-executable mappings for 64-bit processes. - * - * on Enable (default) - * off Disable - */ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - -int force_personality32; - -/* - * noexec32=on|off - * Control non executable heap for 32bit processes. - * To control the stack too use noexec=off - * - * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) - * off PROT_READ implies PROT_EXEC - */ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - /* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. diff --git a/trunk/arch/x86/pci/acpi.c b/trunk/arch/x86/pci/acpi.c index 1d88d2b39771..19af06927fbc 100644 --- a/trunk/arch/x86/pci/acpi.c +++ b/trunk/arch/x86/pci/acpi.c @@ -250,5 +250,10 @@ int __init pci_acpi_init(void) acpi_pci_irq_enable(dev); } +#ifdef CONFIG_X86_IO_APIC + if (acpi_ioapic) + print_IO_APIC(); +#endif + return 0; } diff --git a/trunk/arch/x86/pci/i386.c b/trunk/arch/x86/pci/i386.c index 844df0cbbd3e..8791fc55e715 100644 --- a/trunk/arch/x86/pci/i386.c +++ b/trunk/arch/x86/pci/i386.c @@ -33,7 +33,6 @@ #include #include -#include #include "pci.h" @@ -228,8 +227,6 @@ void __init pcibios_resource_survey(void) pcibios_allocate_bus_resources(&pci_root_buses); pcibios_allocate_resources(0); pcibios_allocate_resources(1); - - e820_reserve_resources_late(); } /** diff --git a/trunk/arch/x86/pci/mmconfig-shared.c b/trunk/arch/x86/pci/mmconfig-shared.c index 654a2234f8f3..d9635764ce3d 100644 --- a/trunk/arch/x86/pci/mmconfig-shared.c +++ b/trunk/arch/x86/pci/mmconfig-shared.c @@ -209,7 +209,7 @@ static int __init pci_mmcfg_check_hostbridge(void) return name != NULL; } -static void __init pci_mmcfg_insert_resources(void) +static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) { #define PCI_MMCFG_RESOURCE_NAME_LEN 19 int i; @@ -233,7 +233,7 @@ static void __init pci_mmcfg_insert_resources(void) cfg->pci_segment); res->start = cfg->address; res->end = res->start + (num_buses << 20) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | resource_flags; insert_resource(&iomem_resource, res); names += PCI_MMCFG_RESOURCE_NAME_LEN; } @@ -434,9 +434,11 @@ static void __init __pci_mmcfg_init(int early) (pci_mmcfg_config[0].address == 0)) return; - if (pci_mmcfg_arch_init()) + if (pci_mmcfg_arch_init()) { + if (known_bridge) + pci_mmcfg_insert_resources(IORESOURCE_BUSY); pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; - else { + } else { /* * Signal not to attempt to insert mmcfg resources because * the architecture mmcfg setup could not initialize. @@ -473,7 +475,7 @@ static int __init pci_mmcfg_late_insert_resources(void) * marked so it won't cause request errors when __request_region is * called. */ - pci_mmcfg_insert_resources(); + pci_mmcfg_insert_resources(0); return 0; } diff --git a/trunk/arch/x86/power/cpu_32.c b/trunk/arch/x86/power/cpu_32.c index 274d06082f48..d3e083dea720 100644 --- a/trunk/arch/x86/power/cpu_32.c +++ b/trunk/arch/x86/power/cpu_32.c @@ -11,7 +11,6 @@ #include #include #include -#include static struct saved_context saved_context; @@ -127,12 +126,6 @@ static void __restore_processor_state(struct saved_context *ctxt) if (boot_cpu_has(X86_FEATURE_SEP)) enable_sep_cpu(); - /* - * restore XCR0 for xsave capable cpu's. - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); - fix_processor_context(); do_fpu_end(); mtrr_ap_init(); diff --git a/trunk/arch/x86/power/cpu_64.c b/trunk/arch/x86/power/cpu_64.c index e3b6cf70d62c..66bdfb591fd8 100644 --- a/trunk/arch/x86/power/cpu_64.c +++ b/trunk/arch/x86/power/cpu_64.c @@ -14,7 +14,6 @@ #include #include #include -#include static void fix_processor_context(void); @@ -123,12 +122,6 @@ static void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); - /* - * restore XCR0 for xsave capable cpu's. - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); - fix_processor_context(); do_fpu_end(); diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c index a27d562a9744..7dcd321a0508 100644 --- a/trunk/arch/x86/xen/enlighten.c +++ b/trunk/arch/x86/xen/enlighten.c @@ -36,7 +36,6 @@ #include #include -#include #include #include #include @@ -581,47 +580,16 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC -static u32 xen_apic_read(u32 reg) +static u32 xen_apic_read(unsigned long reg) { return 0; } -static void xen_apic_write(u32 reg, u32 val) +static void xen_apic_write(unsigned long reg, u32 val) { /* Warn to see if there's any stray references */ WARN_ON(1); } - -static u64 xen_apic_icr_read(void) -{ - return 0; -} - -static void xen_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void xen_apic_wait_icr_idle(void) -{ - return; -} - -static u32 xen_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static struct apic_ops xen_basic_apic_ops = { - .read = xen_apic_read, - .write = xen_apic_write, - .icr_read = xen_apic_icr_read, - .icr_write = xen_apic_icr_write, - .wait_icr_idle = xen_apic_wait_icr_idle, - .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, -}; - #endif static void xen_flush_tlb(void) @@ -1305,6 +1273,8 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC + .apic_write = xen_apic_write, + .apic_read = xen_apic_read, .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, @@ -1707,13 +1677,6 @@ asmlinkage void __init xen_start_kernel(void) pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; -#ifdef CONFIG_X86_LOCAL_APIC - /* - * set up the basic apic ops. - */ - apic_ops = &xen_basic_apic_ops; -#endif - if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; diff --git a/trunk/drivers/acpi/glue.c b/trunk/drivers/acpi/glue.c index 3c578ef78c48..8dd3336efd7e 100644 --- a/trunk/drivers/acpi/glue.c +++ b/trunk/drivers/acpi/glue.c @@ -369,6 +369,7 @@ static int __init acpi_rtc_init(void) DBG("RTC unavailable?\n"); return 0; } -module_init(acpi_rtc_init); +/* do this between RTC subsys_initcall() and rtc_cmos driver_initcall() */ +fs_initcall(acpi_rtc_init); #endif diff --git a/trunk/drivers/ata/libata-scsi.c b/trunk/drivers/ata/libata-scsi.c index 5d312dc9be9f..59fe051957ef 100644 --- a/trunk/drivers/ata/libata-scsi.c +++ b/trunk/drivers/ata/libata-scsi.c @@ -503,7 +503,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) scsi_cmd[0] = ATA_16; scsi_cmd[4] = args[2]; - if (args[0] == ATA_CMD_SMART) { /* hack -- ide driver does this too */ + if (args[0] == WIN_SMART) { /* hack -- ide driver does this too... */ scsi_cmd[6] = args[3]; scsi_cmd[8] = args[1]; scsi_cmd[10] = 0x4f; diff --git a/trunk/drivers/block/hd.c b/trunk/drivers/block/hd.c index 482c0c4b964f..682243bf2e46 100644 --- a/trunk/drivers/block/hd.c +++ b/trunk/drivers/block/hd.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #define REALLY_SLOW_IO @@ -371,7 +370,7 @@ static void reset_hd(void) struct hd_i_struct *disk = &hd_info[i]; disk->special_op = disk->recalibrate = 1; hd_out(disk, disk->sect, disk->sect, disk->head-1, - disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd); + disk->cyl, WIN_SPECIFY, &reset_hd); if (reset) goto repeat; } else @@ -559,7 +558,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req) { if (disk->recalibrate) { disk->recalibrate = 0; - hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr); + hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); return reset; } if (disk->head > 16) { @@ -632,13 +631,13 @@ static void hd_request(void) if (blk_fs_request(req)) { switch (rq_data_dir(req)) { case READ: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, + hd_out(disk, nsect, sec, head, cyl, WIN_READ, &read_intr); if (reset) goto repeat; break; case WRITE: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, + hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, &write_intr); if (reset) goto repeat; diff --git a/trunk/drivers/ide/Kconfig b/trunk/drivers/ide/Kconfig index b50b5dac95b0..052879a6f853 100644 --- a/trunk/drivers/ide/Kconfig +++ b/trunk/drivers/ide/Kconfig @@ -131,6 +131,29 @@ config BLK_DEV_IDEDISK If unsure, say Y. +config IDEDISK_MULTI_MODE + bool "Use multiple sector mode for Programmed Input/Output by default" + help + This setting is irrelevant for most IDE disks, with direct memory + access, to which multiple sector mode does not apply. Multiple sector + mode is a feature of most modern IDE hard drives, permitting the + transfer of multiple sectors per Programmed Input/Output interrupt, + rather than the usual one sector per interrupt. When this feature is + enabled, it can reduce operating system overhead for disk Programmed + Input/Output. On some systems, it also can increase the data + throughput of Programmed Input/Output. Some drives, however, seemed + to run slower with multiple sector mode enabled. Some drives claimed + to support multiple sector mode, but lost data at some settings. + Under rare circumstances, such failures could result in massive + filesystem corruption. + + If you get the following error, try to say Y here: + + hda: set_multmode: status=0x51 { DriveReady SeekComplete Error } + hda: set_multmode: error=0x04 { DriveStatusError } + + If in doubt, say N. + config BLK_DEV_IDECS tristate "PCMCIA IDE support" depends on PCMCIA diff --git a/trunk/drivers/ide/Makefile b/trunk/drivers/ide/Makefile index 308b8a12f314..64e0ecdc4ed5 100644 --- a/trunk/drivers/ide/Makefile +++ b/trunk/drivers/ide/Makefile @@ -4,8 +4,8 @@ EXTRA_CFLAGS += -Idrivers/ide -ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \ - ide-taskfile.o ide-pio-blacklist.o +ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o \ + ide-pio-blacklist.o # core IDE code ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o @@ -37,12 +37,11 @@ obj-$(CONFIG_IDE_GENERIC) += ide-generic.o obj-$(CONFIG_BLK_DEV_IDEPNP) += ide-pnp.o ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o -ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o obj-$(CONFIG_BLK_DEV_IDEDISK) += ide-disk.o obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd_mod.o -obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy_mod.o obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o +obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy.o ifeq ($(CONFIG_BLK_DEV_IDECS), y) ide-cs-core-y += legacy/ide-cs.o diff --git a/trunk/drivers/ide/arm/icside.c b/trunk/drivers/ide/arm/icside.c index 70f5b164828b..df4af4083954 100644 --- a/trunk/drivers/ide/arm/icside.c +++ b/trunk/drivers/ide/arm/icside.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -264,8 +265,8 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode) * If we're going to be doing MW_DMA_1 or MW_DMA_2, we should * take care to note the values in the ID... */ - if (use_dma_info && drive->id[ATA_ID_EIDE_DMA_TIME] > cycle_time) - cycle_time = drive->id[ATA_ID_EIDE_DMA_TIME]; + if (use_dma_info && drive->id->eide_dma_time > cycle_time) + cycle_time = drive->id->eide_dma_time; drive->drive_data = cycle_time; diff --git a/trunk/drivers/ide/arm/palm_bk3710.c b/trunk/drivers/ide/arm/palm_bk3710.c index 122ed3c072fd..4fd91dcf1dc2 100644 --- a/trunk/drivers/ide/arm/palm_bk3710.c +++ b/trunk/drivers/ide/arm/palm_bk3710.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -179,7 +180,7 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate, val32 |= (t2i << (dev ? 8 : 0)); writel(val32, base + BK3710_DATRCVR); - if (mate) { + if (mate && mate->present) { u8 mode2 = ide_get_best_pio_mode(mate, 255, 4); if (mode2 < mode) @@ -212,8 +213,7 @@ static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed) palm_bk3710_setudmamode(base, is_slave, xferspeed - XFER_UDMA_0); } else { - palm_bk3710_setdmamode(base, is_slave, - drive->id[ATA_ID_EIDE_DMA_MIN], + palm_bk3710_setdmamode(base, is_slave, drive->id->eide_dma_min, xferspeed); } } @@ -229,7 +229,7 @@ static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio) * Obtain the drive PIO data for tuning the Palm Chip registers */ cycle_time = ide_pio_cycle_time(drive, pio); - mate = ide_get_pair_dev(drive); + mate = ide_get_paired_drive(drive); palm_bk3710_setpiomode(base, mate, is_slave, cycle_time, pio); } diff --git a/trunk/drivers/ide/ide-acpi.c b/trunk/drivers/ide/ide-acpi.c index 2427c380b3dc..6f704628c27d 100644 --- a/trunk/drivers/ide/ide-acpi.c +++ b/trunk/drivers/ide/ide-acpi.c @@ -584,7 +584,7 @@ void ide_acpi_get_timing(ide_hwif_t *hwif) * This function executes the _STM ACPI method for the target channel. * * _STM requires Identify Drive data, which has to passed as an argument. - * Unfortunately drive->id is a mangled version which we can't readily + * Unfortunately hd_driveid is a mangled version which we can't readily * use; hence we'll get the information afresh. */ void ide_acpi_push_timing(ide_hwif_t *hwif) @@ -614,10 +614,10 @@ void ide_acpi_push_timing(ide_hwif_t *hwif) in_params[0].buffer.length = sizeof(struct GTM_buffer); in_params[0].buffer.pointer = (u8 *)&hwif->acpidata->gtm; in_params[1].type = ACPI_TYPE_BUFFER; - in_params[1].buffer.length = sizeof(ATA_ID_WORDS * 2); + in_params[1].buffer.length = sizeof(struct hd_driveid); in_params[1].buffer.pointer = (u8 *)&master->idbuff; in_params[2].type = ACPI_TYPE_BUFFER; - in_params[2].buffer.length = sizeof(ATA_ID_WORDS * 2); + in_params[2].buffer.length = sizeof(struct hd_driveid); in_params[2].buffer.pointer = (u8 *)&slave->idbuff; /* Output buffer: _STM has no output */ diff --git a/trunk/drivers/ide/ide-atapi.c b/trunk/drivers/ide/ide-atapi.c index 608c5bade929..adf04f99cdeb 100644 --- a/trunk/drivers/ide/ide-atapi.c +++ b/trunk/drivers/ide/ide-atapi.c @@ -14,201 +14,12 @@ #define debug_log(fmt, args...) do {} while (0) #endif -/* - * Check whether we can support a device, - * based on the ATAPI IDENTIFY command results. - */ -int ide_check_atapi_device(ide_drive_t *drive, const char *s) -{ - u16 *id = drive->id; - u8 gcw[2], protocol, device_type, removable, drq_type, packet_size; - - *((u16 *)&gcw) = id[ATA_ID_CONFIG]; - - protocol = (gcw[1] & 0xC0) >> 6; - device_type = gcw[1] & 0x1F; - removable = (gcw[0] & 0x80) >> 7; - drq_type = (gcw[0] & 0x60) >> 5; - packet_size = gcw[0] & 0x03; - -#ifdef CONFIG_PPC - /* kludge for Apple PowerBook internal zip */ - if (drive->media == ide_floppy && device_type == 5 && - !strstr((char *)&id[ATA_ID_PROD], "CD-ROM") && - strstr((char *)&id[ATA_ID_PROD], "ZIP")) - device_type = 0; -#endif - - if (protocol != 2) - printk(KERN_ERR "%s: %s: protocol (0x%02x) is not ATAPI\n", - s, drive->name, protocol); - else if ((drive->media == ide_floppy && device_type != 0) || - (drive->media == ide_tape && device_type != 1)) - printk(KERN_ERR "%s: %s: invalid device type (0x%02x)\n", - s, drive->name, device_type); - else if (removable == 0) - printk(KERN_ERR "%s: %s: the removable flag is not set\n", - s, drive->name); - else if (drive->media == ide_floppy && drq_type == 3) - printk(KERN_ERR "%s: %s: sorry, DRQ type (0x%02x) not " - "supported\n", s, drive->name, drq_type); - else if (packet_size != 0) - printk(KERN_ERR "%s: %s: packet size (0x%02x) is not 12 " - "bytes\n", s, drive->name, packet_size); - else - return 1; - return 0; -} -EXPORT_SYMBOL_GPL(ide_check_atapi_device); - -/* PIO data transfer routine using the scatter gather table. */ -int ide_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - ide_hwif_t *hwif = drive->hwif; - const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data; - struct scatterlist *sg = pc->sg; - char *buf; - int count, done = 0; - - while (bcount) { - count = min(sg->length - pc->b_count, bcount); - - if (PageHighMem(sg_page(sg))) { - unsigned long flags; - - local_irq_save(flags); - buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; - xf(drive, NULL, buf + pc->b_count, count); - kunmap_atomic(buf - sg->offset, KM_IRQ0); - local_irq_restore(flags); - } else { - buf = sg_virt(sg); - xf(drive, NULL, buf + pc->b_count, count); - } - - bcount -= count; - pc->b_count += count; - done += count; - - if (pc->b_count == sg->length) { - if (!--pc->sg_cnt) - break; - pc->sg = sg = sg_next(sg); - pc->b_count = 0; - } - } - - if (bcount) { - printk(KERN_ERR "%s: %d leftover bytes, %s\n", drive->name, - bcount, write ? "padding with zeros" - : "discarding data"); - ide_pad_transfer(drive, write, bcount); - } - - return done; -} -EXPORT_SYMBOL_GPL(ide_io_buffers); - -void ide_init_pc(struct ide_atapi_pc *pc) -{ - memset(pc, 0, sizeof(*pc)); - pc->buf = pc->pc_buf; - pc->buf_size = IDE_PC_BUFFER_SIZE; -} -EXPORT_SYMBOL_GPL(ide_init_pc); - -/* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->buffer = (char *)pc; - rq->rq_disk = disk; - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - ide_do_drive_cmd(drive, rq); -} -EXPORT_SYMBOL_GPL(ide_queue_pc_head); - -/* - * Add a special packet command request to the tail of the request queue, - * and wait for it to be serviced. - */ -int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc) -{ - struct request *rq; - int error; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->buffer = (char *)pc; - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - error = blk_execute_rq(drive->queue, disk, rq, 0); - blk_put_request(rq); - - return error; -} -EXPORT_SYMBOL_GPL(ide_queue_pc_tail); - -int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk) -{ - struct ide_atapi_pc pc; - - ide_init_pc(&pc); - pc.c[0] = TEST_UNIT_READY; - - return ide_queue_pc_tail(drive, disk, &pc); -} -EXPORT_SYMBOL_GPL(ide_do_test_unit_ready); - -int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start) -{ - struct ide_atapi_pc pc; - - ide_init_pc(&pc); - pc.c[0] = START_STOP; - pc.c[4] = start; - - if (drive->media == ide_tape) - pc.flags |= PC_FLAG_WAIT_FOR_DSC; - - return ide_queue_pc_tail(drive, disk, &pc); -} -EXPORT_SYMBOL_GPL(ide_do_start_stop); - -int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on) -{ - struct ide_atapi_pc pc; - - if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) - return 0; - - ide_init_pc(&pc); - pc.c[0] = ALLOW_MEDIUM_REMOVAL; - pc.c[4] = on; - - return ide_queue_pc_tail(drive, disk, &pc); -} -EXPORT_SYMBOL_GPL(ide_set_media_lock); - /* TODO: unify the code thus making some arguments go away */ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), - int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int)) + void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int)) { ide_hwif_t *hwif = drive->hwif; struct request *rq = hwif->hwgroup->rq; @@ -230,7 +41,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { if (hwif->dma_ops->dma_end(drive) || - (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) { + (drive->media == ide_tape && !scsi && (stat & ERR_STAT))) { if (drive->media == ide_floppy && !scsi) printk(KERN_ERR "%s: DMA %s error\n", drive->name, rq_data_dir(pc->rq) @@ -245,7 +56,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, } /* No more interrupts */ - if ((stat & ATA_DRQ) == 0) { + if ((stat & DRQ_STAT) == 0) { debug_log("Packet command completed, %d bytes transferred\n", pc->xferred); @@ -254,10 +65,10 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, local_irq_enable_in_hardirq(); if (drive->media == ide_tape && !scsi && - (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE) - stat &= ~ATA_ERR; + (stat & ERR_STAT) && rq->cmd[0] == REQUEST_SENSE) + stat &= ~ERR_STAT; - if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) { + if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) { /* Error detected */ debug_log("%s: I/O error\n", drive->name); @@ -284,7 +95,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, cmd_finished: pc->error = 0; if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && - (stat & ATA_DSC) == 0) { + (stat & SEEK_STAT) == 0) { dsc_handle(drive); return ide_stopped; } @@ -306,18 +117,17 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, /* Get the number of bytes to transfer on this interrupt. */ ide_read_bcount_and_ireason(drive, &bcount, &ireason); - if (ireason & ATAPI_COD) { + if (ireason & CD) { printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__); return ide_do_reset(drive); } - if (((ireason & ATAPI_IO) == ATAPI_IO) == - !!(pc->flags & PC_FLAG_WRITING)) { + if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) { /* Hopefully, we will never get here */ printk(KERN_ERR "%s: We wanted to %s, but the device wants us " "to %s!\n", drive->name, - (ireason & ATAPI_IO) ? "Write" : "Read", - (ireason & ATAPI_IO) ? "Read" : "Write"); + (ireason & IO) ? "Write" : "Read", + (ireason & IO) ? "Read" : "Write"); return ide_do_reset(drive); } @@ -361,14 +171,9 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, if ((drive->media == ide_floppy && !scsi && !pc->buf) || (drive->media == ide_tape && !scsi && pc->bh) || - (scsi && pc->sg)) { - int done = io_buffers(drive, pc, bcount, - !!(pc->flags & PC_FLAG_WRITING)); - - /* FIXME: don't do partial completions */ - if (drive->media == ide_floppy && !scsi) - ide_end_request(drive, 1, done >> 9); - } else + (scsi && pc->sg)) + io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING)); + else xferfunc(drive, NULL, pc->cur_pos, bcount); /* Update the current position */ @@ -400,8 +205,7 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason) { int retries = 100; - while (retries-- && ((ireason & ATAPI_COD) == 0 || - (ireason & ATAPI_IO))) { + while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) { printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " "a packet command, retrying\n", drive->name); udelay(100); @@ -410,8 +214,8 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason) printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " "a packet command, ignoring\n", drive->name); - ireason |= ATAPI_COD; - ireason &= ~ATAPI_IO; + ireason |= CD; + ireason &= ~IO; } } @@ -427,7 +231,7 @@ ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_startstop_t startstop; u8 ireason; - if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) { printk(KERN_ERR "%s: Strange, packet command initiated yet " "DRQ isn't asserted\n", drive->name); return startstop; @@ -437,7 +241,7 @@ ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc, if (drive->media == ide_tape && !drive->scsi) ireason = ide_wait_ireason(drive, ireason); - if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) { + if ((ireason & CD) == 0 || (ireason & IO)) { printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing " "a packet command\n", drive->name); return ide_do_reset(drive); @@ -499,7 +303,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_atapi_pc *pc, /* Issue the packet command */ if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { - ide_execute_command(drive, ATA_CMD_PACKET, handler, + ide_execute_command(drive, WIN_PACKETCMD, handler, timeout, NULL); return ide_started; } else { diff --git a/trunk/drivers/ide/ide-cd.c b/trunk/drivers/ide/ide-cd.c index 465a92ca0179..03c2cb6a58bc 100644 --- a/trunk/drivers/ide/ide-cd.c +++ b/trunk/drivers/ide/ide-cd.c @@ -436,7 +436,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) ide_dump_status_no_sense(drive, "media error (blank)", stat); do_end_request = 1; - } else if ((err & ~ATA_ABORTED) != 0) { + } else if ((err & ~ABRT_ERR) != 0) { /* go to the default handler for other errors */ ide_error(drive, "cdrom_decode_status", stat); return 1; @@ -457,7 +457,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) * If we got a CHECK_CONDITION status, queue * a request sense command. */ - if (stat & ATA_ERR) + if (stat & ERR_STAT) cdrom_queue_request_sense(drive, NULL, NULL); } else { blk_dump_rq_flags(rq, "ide-cd: bad rq"); @@ -468,7 +468,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) return 1; end_request: - if (stat & ATA_ERR) { + if (stat & ERR_STAT) { unsigned long flags; spin_lock_irqsave(&ide_lock, flags); @@ -541,7 +541,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, drive->waiting_for_dma = 0; /* packet command */ - ide_execute_command(drive, ATA_CMD_PACKET, handler, + ide_execute_command(drive, WIN_PACKETCMD, handler, ATAPI_WAIT_PC, cdrom_timer_expiry); return ide_started; } else { @@ -574,7 +574,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, */ /* check for errors */ - if (cdrom_decode_status(drive, ATA_DRQ, NULL)) + if (cdrom_decode_status(drive, DRQ_STAT, NULL)) return ide_stopped; /* ok, next interrupt will be DMA interrupt */ @@ -582,8 +582,8 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, drive->waiting_for_dma = 1; } else { /* otherwise, we must wait for DRQ to get set */ - if (ide_wait_stat(&startstop, drive, ATA_DRQ, - ATA_BUSY, WAIT_READY)) + if (ide_wait_stat(&startstop, drive, DRQ_STAT, + BUSY_STAT, WAIT_READY)) return startstop; } @@ -938,7 +938,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) thislen = len; /* If DRQ is clear, the command has completed. */ - if ((stat & ATA_DRQ) == 0) { + if ((stat & DRQ_STAT) == 0) { if (blk_fs_request(rq)) { /* * If we're not done reading/writing, complain. @@ -1164,12 +1164,13 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { struct request_queue *q = drive->queue; unsigned int alignment; - char *buf; + unsigned long addr; + unsigned long stack_mask = ~(THREAD_SIZE - 1); if (rq->bio) - buf = bio_data(rq->bio); + addr = (unsigned long)bio_data(rq->bio); else - buf = rq->data; + addr = (unsigned long)rq->data; info->dma = drive->using_dma; @@ -1180,8 +1181,11 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) * separate masks. */ alignment = queue_dma_alignment(q) | q->dma_pad_mask; - if ((unsigned long)buf & alignment || rq->data_len & alignment - || object_is_on_stack(buf)) + if (addr & alignment || rq->data_len & alignment) + info->dma = 0; + + if (!((addr & stack_mask) ^ + ((unsigned long)current->stack & stack_mask))) info->dma = 0; } } @@ -1202,7 +1206,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, unsigned long elapsed = jiffies - info->start_seek; int stat = hwif->tp_ops->read_status(hwif); - if ((stat & ATA_DSC) != ATA_DSC) { + if ((stat & SEEK_STAT) != SEEK_STAT) { if (elapsed < IDECD_SEEK_TIMEOUT) { ide_stall_queue(drive, IDECD_SEEK_TIMER); @@ -1809,12 +1813,13 @@ static ide_proc_entry_t idecd_proc[] = { { NULL, 0, NULL, NULL } }; -ide_devset_rw_field(dsc_overlap, dsc_overlap); - -static const struct ide_proc_devset idecd_settings[] = { - IDE_PROC_DEVSET(dsc_overlap, 0, 1), - { 0 }, -}; +static void ide_cdrom_add_settings(ide_drive_t *drive) +{ + ide_add_setting(drive, "dsc_overlap", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, + &drive->dsc_overlap, NULL); +} +#else +static inline void ide_cdrom_add_settings(ide_drive_t *drive) { ; } #endif static const struct cd_list_entry ide_cd_quirks_list[] = { @@ -1861,14 +1866,14 @@ static const struct cd_list_entry ide_cd_quirks_list[] = { { NULL, NULL, 0 } }; -static unsigned int ide_cd_flags(u16 *id) +static unsigned int ide_cd_flags(struct hd_driveid *id) { const struct cd_list_entry *cle = ide_cd_quirks_list; while (cle->id_model) { - if (strcmp(cle->id_model, (char *)&id[ATA_ID_PROD]) == 0 && + if (strcmp(cle->id_model, id->model) == 0 && (cle->id_firmware == NULL || - strstr((char *)&id[ATA_ID_FW_REV], cle->id_firmware))) + strstr(id->fw_rev, cle->id_firmware))) return cle->cd_flags; cle++; } @@ -1880,8 +1885,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) { struct cdrom_info *cd = drive->driver_data; struct cdrom_device_info *cdi = &cd->devinfo; - u16 *id = drive->id; - char *fw_rev = (char *)&id[ATA_ID_FW_REV]; + struct hd_driveid *id = drive->id; int nslots; blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); @@ -1896,15 +1900,15 @@ static int ide_cdrom_setup(ide_drive_t *drive) drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT | ide_cd_flags(id); - if ((id[ATA_ID_CONFIG] & 0x0060) == 0x20) + if ((id->config & 0x0060) == 0x20) drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT; if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) && - fw_rev[4] == '1' && fw_rev[6] <= '2') + id->fw_rev[4] == '1' && id->fw_rev[6] <= '2') drive->atapi_flags |= (IDE_AFLAG_TOCTRACKS_AS_BCD | IDE_AFLAG_TOCADDR_AS_BCD); else if ((drive->atapi_flags & IDE_AFLAG_VERTOS_600_ESD) && - fw_rev[4] == '1' && fw_rev[6] <= '2') + id->fw_rev[4] == '1' && id->fw_rev[6] <= '2') drive->atapi_flags |= IDE_AFLAG_TOCTRACKS_AS_BCD; else if (drive->atapi_flags & IDE_AFLAG_SANYO_3CD) /* 3 => use CD in slot 0 */ @@ -1923,8 +1927,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) cd->devinfo.handle = NULL; return 1; } - - ide_proc_register_driver(drive, cd->driver); + ide_cdrom_add_settings(drive); return 0; } @@ -1969,12 +1972,12 @@ static ide_driver_t ide_cdrom_driver = { .remove = ide_cd_remove, .version = IDECD_VERSION, .media = ide_cdrom, + .supports_dsc_overlap = 1, .do_request = ide_cd_do_request, .end_request = ide_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idecd_proc, - .settings = idecd_settings, #endif }; @@ -2109,10 +2112,10 @@ static int ide_cd_probe(ide_drive_t *drive) if (!strstr("ide-cdrom", drive->driver_req)) goto failed; - + if (!drive->present) + goto failed; if (drive->media != ide_cdrom && drive->media != ide_optical) goto failed; - /* skip drives that we were told to ignore */ if (ignore != NULL) { if (strstr(ignore, drive->name)) { @@ -2134,6 +2137,8 @@ static int ide_cd_probe(ide_drive_t *drive) ide_init_disk(g, drive); + ide_proc_register_driver(drive, &ide_cdrom_driver); + kref_init(&info->kref); info->drive = drive; @@ -2148,6 +2153,7 @@ static int ide_cd_probe(ide_drive_t *drive) g->driverfs_dev = &drive->gendev; g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE; if (ide_cdrom_setup(drive)) { + ide_proc_unregister_driver(drive, &ide_cdrom_driver); ide_cd_release(&info->kref); goto failed; } diff --git a/trunk/drivers/ide/ide-disk.c b/trunk/drivers/ide/ide-disk.c index 01846f244b40..33ea8c048717 100644 --- a/trunk/drivers/ide/ide-disk.c +++ b/trunk/drivers/ide/ide-disk.c @@ -30,8 +30,10 @@ #include #include #include + +#define _IDE_DISK + #include -#include #include #include @@ -88,19 +90,68 @@ static void ide_disk_put(struct ide_disk_obj *idkp) mutex_unlock(&idedisk_ref_mutex); } +/* + * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" + * value for this drive (from its reported identification information). + * + * Returns: 1 if lba_capacity looks sensible + * 0 otherwise + * + * It is called only once for each drive. + */ +static int lba_capacity_is_ok(struct hd_driveid *id) +{ + unsigned long lba_sects, chs_sects, head, tail; + + /* No non-LBA info .. so valid! */ + if (id->cyls == 0) + return 1; + + /* + * The ATA spec tells large drives to return + * C/H/S = 16383/16/63 independent of their size. + * Some drives can be jumpered to use 15 heads instead of 16. + * Some drives can be jumpered to use 4092 cyls instead of 16383. + */ + if ((id->cyls == 16383 + || (id->cyls == 4092 && id->cur_cyls == 16383)) && + id->sectors == 63 && + (id->heads == 15 || id->heads == 16) && + (id->lba_capacity >= 16383*63*id->heads)) + return 1; + + lba_sects = id->lba_capacity; + chs_sects = id->cyls * id->heads * id->sectors; + + /* perform a rough sanity check on lba_sects: within 10% is OK */ + if ((lba_sects - chs_sects) < chs_sects/10) + return 1; + + /* some drives have the word order reversed */ + head = ((lba_sects >> 16) & 0xffff); + tail = (lba_sects & 0xffff); + lba_sects = (head | (tail << 16)); + if ((lba_sects - chs_sects) < chs_sects/10) { + id->lba_capacity = lba_sects; + return 1; /* lba_capacity is (now) good */ + } + + return 0; /* lba_capacity value may be bad */ +} + static const u8 ide_rw_cmds[] = { - ATA_CMD_READ_MULTI, - ATA_CMD_WRITE_MULTI, - ATA_CMD_READ_MULTI_EXT, - ATA_CMD_WRITE_MULTI_EXT, - ATA_CMD_PIO_READ, - ATA_CMD_PIO_WRITE, - ATA_CMD_PIO_READ_EXT, - ATA_CMD_PIO_WRITE_EXT, - ATA_CMD_READ, - ATA_CMD_WRITE, - ATA_CMD_READ_EXT, - ATA_CMD_WRITE_EXT, + WIN_MULTREAD, + WIN_MULTWRITE, + WIN_MULTREAD_EXT, + WIN_MULTWRITE_EXT, + WIN_READ, + WIN_WRITE, + WIN_READ_EXT, + WIN_WRITE_EXT, + WIN_READDMA, + WIN_WRITEDMA, + WIN_READDMA_EXT, + WIN_WRITEDMA_EXT, }; static const u8 ide_data_phases[] = { @@ -271,9 +322,9 @@ static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48) /* Create IDE/ATA command request structure */ memset(&args, 0, sizeof(ide_task_t)); if (lba48) - tf->command = ATA_CMD_READ_NATIVE_MAX_EXT; + tf->command = WIN_READ_NATIVE_MAX_EXT; else - tf->command = ATA_CMD_READ_NATIVE_MAX; + tf->command = WIN_READ_NATIVE_MAX; tf->device = ATA_LBA; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; if (lba48) @@ -308,10 +359,10 @@ static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48) tf->hob_lbal = (addr_req >>= 8) & 0xff; tf->hob_lbam = (addr_req >>= 8) & 0xff; tf->hob_lbah = (addr_req >>= 8) & 0xff; - tf->command = ATA_CMD_SET_MAX_EXT; + tf->command = WIN_SET_MAX_EXT; } else { tf->device = (addr_req >>= 8) & 0x0f; - tf->command = ATA_CMD_SET_MAX; + tf->command = WIN_SET_MAX; } tf->device |= ATA_LBA; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; @@ -333,6 +384,25 @@ static unsigned long long sectors_to_MB(unsigned long long n) return n; } +/* + * Bits 10 of command_set_1 and cfs_enable_1 must be equal, + * so on non-buggy drives we need test only one. + * However, we should also check whether these fields are valid. + */ +static inline int idedisk_supports_hpa(const struct hd_driveid *id) +{ + return (id->command_set_1 & 0x0400) && (id->cfs_enable_1 & 0x0400); +} + +/* + * The same here. + */ +static inline int idedisk_supports_lba48(const struct hd_driveid *id) +{ + return (id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400) + && id->lba_capacity_2; +} + /* * Some disks report total number of sectors instead of * maximum sector address. We list them here. @@ -347,7 +417,7 @@ static const struct drive_list_entry hpa_list[] = { static void idedisk_check_hpa(ide_drive_t *drive) { unsigned long long capacity, set_max; - int lba48 = ata_id_lba48_enabled(drive->id); + int lba48 = idedisk_supports_lba48(drive->id); capacity = drive->capacity64; @@ -383,23 +453,23 @@ static void idedisk_check_hpa(ide_drive_t *drive) static void init_idedisk_capacity(ide_drive_t *drive) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; /* * If this drive supports the Host Protected Area feature set, * then we may need to change our opinion about the drive's capacity. */ - int hpa = ata_id_hpa_enabled(id); + int hpa = idedisk_supports_hpa(id); - if (ata_id_lba48_enabled(id)) { + if (idedisk_supports_lba48(id)) { /* drive speaks 48-bit LBA */ drive->select.b.lba = 1; - drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); + drive->capacity64 = id->lba_capacity_2; if (hpa) idedisk_check_hpa(drive); - } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) { + } else if ((id->capability & 2) && lba_capacity_is_ok(id)) { /* drive speaks 28-bit LBA */ drive->select.b.lba = 1; - drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY); + drive->capacity64 = id->lba_capacity; if (hpa) idedisk_check_hpa(drive); } else { @@ -410,7 +480,7 @@ static void init_idedisk_capacity(ide_drive_t *drive) static sector_t idedisk_capacity(ide_drive_t *drive) { - return drive->capacity64; + return drive->capacity64 - drive->sect0; } #ifdef CONFIG_IDE_PROC_FS @@ -420,10 +490,10 @@ static int smart_enable(ide_drive_t *drive) struct ide_taskfile *tf = &args.tf; memset(&args, 0, sizeof(ide_task_t)); - tf->feature = ATA_SMART_ENABLE; - tf->lbam = ATA_SMART_LBAM_PASS; - tf->lbah = ATA_SMART_LBAH_PASS; - tf->command = ATA_CMD_SMART; + tf->feature = SMART_ENABLE; + tf->lbam = SMART_LCYL_PASS; + tf->lbah = SMART_HCYL_PASS; + tf->command = WIN_SMART; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; return ide_no_data_taskfile(drive, &args); } @@ -436,9 +506,9 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd) memset(&args, 0, sizeof(ide_task_t)); tf->feature = sub_cmd; tf->nsect = 0x01; - tf->lbam = ATA_SMART_LBAM_PASS; - tf->lbah = ATA_SMART_LBAH_PASS; - tf->command = ATA_CMD_SMART; + tf->lbam = SMART_LCYL_PASS; + tf->lbah = SMART_HCYL_PASS; + tf->command = WIN_SMART; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; args.data_phase = TASKFILE_IN; (void) smart_enable(drive); @@ -453,7 +523,7 @@ static int proc_idedisk_read_cache int len; if (drive->id_read) - len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2); + len = sprintf(out, "%i\n", drive->id->buf_size / 2); else len = sprintf(out, "(none)\n"); @@ -479,14 +549,13 @@ static int proc_idedisk_read_smart(char *page, char **start, off_t off, if (get_smart_data(drive, page, sub_cmd) == 0) { unsigned short *val = (unsigned short *) page; - char *out = (char *)val + SECTOR_SIZE; - + char *out = ((char *)val) + (SECTOR_WORDS * 4); page = out; do { out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n'); val += 1; - } while (i < SECTOR_SIZE / 2); + } while (i < (SECTOR_WORDS * 2)); len = out - page; } @@ -497,14 +566,14 @@ static int proc_idedisk_read_sv (char *page, char **start, off_t off, int count, int *eof, void *data) { return proc_idedisk_read_smart(page, start, off, count, eof, data, - ATA_SMART_READ_VALUES); + SMART_READ_VALUES); } static int proc_idedisk_read_st (char *page, char **start, off_t off, int count, int *eof, void *data) { return proc_idedisk_read_smart(page, start, off, count, eof, data, - ATA_SMART_READ_THRESHOLDS); + SMART_READ_THRESHOLDS); } static ide_proc_entry_t idedisk_proc[] = { @@ -526,11 +595,11 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) BUG_ON(task == NULL); memset(task, 0, sizeof(*task)); - if (ata_id_flush_ext_enabled(drive->id) && + if (ide_id_has_flush_cache_ext(drive->id) && (drive->capacity64 >= (1UL << 28))) - task->tf.command = ATA_CMD_FLUSH_EXT; + task->tf.command = WIN_FLUSH_CACHE_EXT; else - task->tf.command = ATA_CMD_FLUSH; + task->tf.command = WIN_FLUSH_CACHE; task->tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE | IDE_TFLAG_DYN; task->data_phase = TASKFILE_NO_DATA; @@ -540,8 +609,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) rq->special = task; } -ide_devset_get(multcount, mult_count); - /* * This is tightly woven into the driver->do_special can not touch. * DON'T do it again until a total personality rewrite is committed. @@ -551,7 +618,7 @@ static int set_multcount(ide_drive_t *drive, int arg) struct request *rq; int error; - if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) + if (arg < 0 || arg > drive->id->max_multsect) return -EINVAL; if (drive->special.b.set_multmode) @@ -568,21 +635,22 @@ static int set_multcount(ide_drive_t *drive, int arg) return (drive->mult_count == arg) ? 0 : -EIO; } -ide_devset_get(nowerr, nowerr); - static int set_nowerr(ide_drive_t *drive, int arg) { if (arg < 0 || arg > 1) return -EINVAL; + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; drive->nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; + spin_unlock_irq(&ide_lock); return 0; } static void update_ordered(ide_drive_t *drive) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; unsigned ordered = QUEUE_ORDERED_NONE; prepare_flush_fn *prep_fn = NULL; @@ -598,9 +666,9 @@ static void update_ordered(ide_drive_t *drive) * not available so we don't need to recheck that. */ capacity = idedisk_capacity(drive); - barrier = ata_id_flush_enabled(id) && !drive->noflush && + barrier = ide_id_has_flush_cache(id) && !drive->noflush && (drive->addressing == 0 || capacity <= (1ULL << 28) || - ata_id_flush_ext_enabled(id)); + ide_id_has_flush_cache_ext(id)); printk(KERN_INFO "%s: cache flushes %ssupported\n", drive->name, barrier ? "" : "not "); @@ -615,9 +683,7 @@ static void update_ordered(ide_drive_t *drive) blk_queue_ordered(drive->queue, ordered, prep_fn); } -ide_devset_get(wcache, wcache); - -static int set_wcache(ide_drive_t *drive, int arg) +static int write_cache(ide_drive_t *drive, int arg) { ide_task_t args; int err = 1; @@ -625,11 +691,11 @@ static int set_wcache(ide_drive_t *drive, int arg) if (arg < 0 || arg > 1) return -EINVAL; - if (ata_id_flush_enabled(drive->id)) { + if (ide_id_has_flush_cache(drive->id)) { memset(&args, 0, sizeof(ide_task_t)); args.tf.feature = arg ? - SETFEATURES_WC_ON : SETFEATURES_WC_OFF; - args.tf.command = ATA_CMD_SET_FEATURES; + SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; + args.tf.command = WIN_SETFEATURES; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; err = ide_no_data_taskfile(drive, &args); if (err == 0) @@ -646,16 +712,14 @@ static int do_idedisk_flushcache(ide_drive_t *drive) ide_task_t args; memset(&args, 0, sizeof(ide_task_t)); - if (ata_id_flush_ext_enabled(drive->id)) - args.tf.command = ATA_CMD_FLUSH_EXT; + if (ide_id_has_flush_cache_ext(drive->id)) + args.tf.command = WIN_FLUSH_CACHE_EXT; else - args.tf.command = ATA_CMD_FLUSH; + args.tf.command = WIN_FLUSH_CACHE; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; return ide_no_data_taskfile(drive, &args); } -ide_devset_get(acoustic, acoustic); - static int set_acoustic(ide_drive_t *drive, int arg) { ide_task_t args; @@ -664,24 +728,22 @@ static int set_acoustic(ide_drive_t *drive, int arg) return -EINVAL; memset(&args, 0, sizeof(ide_task_t)); - args.tf.feature = arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF; + args.tf.feature = arg ? SETFEATURES_EN_AAM : SETFEATURES_DIS_AAM; args.tf.nsect = arg; - args.tf.command = ATA_CMD_SET_FEATURES; + args.tf.command = WIN_SETFEATURES; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; ide_no_data_taskfile(drive, &args); drive->acoustic = arg; return 0; } -ide_devset_get(addressing, addressing); - /* * drive->addressing: * 0: 28-bit * 1: 48-bit * 2: 48-bit capable doing 28-bit */ -static int set_addressing(ide_drive_t *drive, int arg) +static int set_lba_addressing(ide_drive_t *drive, int arg) { if (arg < 0 || arg > 2) return -EINVAL; @@ -691,54 +753,52 @@ static int set_addressing(ide_drive_t *drive, int arg) if (drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) return 0; - if (ata_id_lba48_enabled(drive->id) == 0) + if (!idedisk_supports_lba48(drive->id)) return -EIO; - drive->addressing = arg; - return 0; } -ide_devset_rw(acoustic, acoustic); -ide_devset_rw(address, addressing); -ide_devset_rw(multcount, multcount); -ide_devset_rw(wcache, wcache); - -ide_devset_rw_sync(nowerr, nowerr); - #ifdef CONFIG_IDE_PROC_FS -ide_devset_rw_field(bios_cyl, bios_cyl); -ide_devset_rw_field(bios_head, bios_head); -ide_devset_rw_field(bios_sect, bios_sect); -ide_devset_rw_field(failures, failures); -ide_devset_rw_field(lun, lun); -ide_devset_rw_field(max_failures, max_failures); - -static const struct ide_proc_devset idedisk_settings[] = { - IDE_PROC_DEVSET(acoustic, 0, 254), - IDE_PROC_DEVSET(address, 0, 2), - IDE_PROC_DEVSET(bios_cyl, 0, 65535), - IDE_PROC_DEVSET(bios_head, 0, 255), - IDE_PROC_DEVSET(bios_sect, 0, 63), - IDE_PROC_DEVSET(failures, 0, 65535), - IDE_PROC_DEVSET(lun, 0, 7), - IDE_PROC_DEVSET(max_failures, 0, 65535), - IDE_PROC_DEVSET(multcount, 0, 16), - IDE_PROC_DEVSET(nowerr, 0, 1), - IDE_PROC_DEVSET(wcache, 0, 1), - { 0 }, -}; +static void idedisk_add_settings(ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + + ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 65535, 1, 1, + &drive->bios_cyl, NULL); + ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, + &drive->bios_head, NULL); + ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, + &drive->bios_sect, NULL); + ide_add_setting(drive, "address", SETTING_RW, TYPE_BYTE, 0, 2, 1, 1, + &drive->addressing, set_lba_addressing); + ide_add_setting(drive, "multcount", SETTING_RW, TYPE_BYTE, 0, + id->max_multsect, 1, 1, &drive->mult_count, + set_multcount); + ide_add_setting(drive, "nowerr", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, + &drive->nowerr, set_nowerr); + ide_add_setting(drive, "lun", SETTING_RW, TYPE_INT, 0, 7, 1, 1, + &drive->lun, NULL); + ide_add_setting(drive, "wcache", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, + &drive->wcache, write_cache); + ide_add_setting(drive, "acoustic", SETTING_RW, TYPE_BYTE, 0, 254, 1, 1, + &drive->acoustic, set_acoustic); + ide_add_setting(drive, "failures", SETTING_RW, TYPE_INT, 0, 65535, 1, 1, + &drive->failures, NULL); + ide_add_setting(drive, "max_failures", SETTING_RW, TYPE_INT, 0, 65535, + 1, 1, &drive->max_failures, NULL); +} +#else +static inline void idedisk_add_settings(ide_drive_t *drive) { ; } #endif static void idedisk_setup(ide_drive_t *drive) { - struct ide_disk_obj *idkp = drive->driver_data; ide_hwif_t *hwif = drive->hwif; - u16 *id = drive->id; - char *m = (char *)&id[ATA_ID_PROD]; + struct hd_driveid *id = drive->id; unsigned long long capacity; - ide_proc_register_driver(drive, idkp->driver); + idedisk_add_settings(drive); if (drive->id_read == 0) return; @@ -747,11 +807,11 @@ static void idedisk_setup(ide_drive_t *drive) /* * Removable disks (eg. SYQUEST); ignore 'WD' drives */ - if (m[0] != 'W' || m[1] != 'D') + if (id->model[0] != 'W' || id->model[1] != 'D') drive->doorlocking = 1; } - (void)set_addressing(drive, 1); + (void)set_lba_addressing(drive, 1); if (drive->addressing == 1) { int max_s = 2048; @@ -793,7 +853,8 @@ static void idedisk_setup(ide_drive_t *drive) capacity = idedisk_capacity(drive); if (!drive->forced_geom) { - if (ata_id_lba48_enabled(drive->id)) { + + if (idedisk_supports_lba48(drive->id)) { /* compatibility */ drive->bios_sect = 63; drive->bios_head = 255; @@ -819,22 +880,22 @@ static void idedisk_setup(ide_drive_t *drive) drive->name, capacity, sectors_to_MB(capacity)); /* Only print cache size when it was specified */ - if (id[ATA_ID_BUF_SIZE]) - printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2); + if (id->buf_size) + printk(KERN_CONT " w/%dKiB Cache", id->buf_size / 2); printk(KERN_CONT ", CHS=%d/%d/%d\n", drive->bios_cyl, drive->bios_head, drive->bios_sect); /* write cache enabled? */ - if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id)) + if ((id->csfo & 1) || (id->cfs_enable_1 & (1 << 5))) drive->wcache = 1; - set_wcache(drive, 1); + write_cache(drive, 1); } static void ide_cacheflush_p(ide_drive_t *drive) { - if (!drive->wcache || ata_id_flush_enabled(drive->id) == 0) + if (!drive->wcache || !ide_id_has_flush_cache(drive->id)) return; if (do_idedisk_flushcache(drive)) @@ -876,7 +937,7 @@ static int ide_disk_probe(ide_drive_t *drive); */ static void ide_disk_resume(ide_drive_t *drive) { - if (ata_id_hpa_enabled(drive->id)) + if (idedisk_supports_hpa(drive->id)) init_idedisk_capacity(drive); } @@ -919,12 +980,12 @@ static ide_driver_t idedisk_driver = { .shutdown = ide_device_shutdown, .version = IDEDISK_VERSION, .media = ide_disk, + .supports_dsc_overlap = 0, .do_request = ide_do_rw_disk, .end_request = ide_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idedisk_proc, - .settings = idedisk_settings, #endif }; @@ -933,7 +994,7 @@ static int idedisk_set_doorlock(ide_drive_t *drive, int on) ide_task_t task; memset(&task, 0, sizeof(task)); - task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; + task.tf.command = on ? WIN_DOORLOCK : WIN_DOORUNLOCK; task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; return ide_no_data_taskfile(drive, &task); @@ -998,28 +1059,52 @@ static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = { -{ HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, &ide_devset_address }, -{ HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, &ide_devset_multcount }, -{ HDIO_GET_NOWERR, HDIO_SET_NOWERR, &ide_devset_nowerr }, -{ HDIO_GET_WCACHE, HDIO_SET_WCACHE, &ide_devset_wcache }, -{ HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, &ide_devset_acoustic }, -{ 0 } -}; - static int idedisk_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { + unsigned long flags; struct block_device *bdev = inode->i_bdev; struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk); ide_drive_t *drive = idkp->drive; - int err; - - err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings); - if (err != -EOPNOTSUPP) - return err; + int err, (*setfunc)(ide_drive_t *, int); + u8 *val; + + switch (cmd) { + case HDIO_GET_ADDRESS: val = &drive->addressing; goto read_val; + case HDIO_GET_MULTCOUNT: val = &drive->mult_count; goto read_val; + case HDIO_GET_NOWERR: val = &drive->nowerr; goto read_val; + case HDIO_GET_WCACHE: val = &drive->wcache; goto read_val; + case HDIO_GET_ACOUSTIC: val = &drive->acoustic; goto read_val; + case HDIO_SET_ADDRESS: setfunc = set_lba_addressing; goto set_val; + case HDIO_SET_MULTCOUNT: setfunc = set_multcount; goto set_val; + case HDIO_SET_NOWERR: setfunc = set_nowerr; goto set_val; + case HDIO_SET_WCACHE: setfunc = write_cache; goto set_val; + case HDIO_SET_ACOUSTIC: setfunc = set_acoustic; goto set_val; + } return generic_ide_ioctl(drive, file, bdev, cmd, arg); + +read_val: + mutex_lock(&ide_setting_mtx); + spin_lock_irqsave(&ide_lock, flags); + err = *val; + spin_unlock_irqrestore(&ide_lock, flags); + mutex_unlock(&ide_setting_mtx); + return err >= 0 ? put_user(err, (long __user *)arg) : err; + +set_val: + if (bdev != bdev->bd_contains) + err = -EINVAL; + else { + if (!capable(CAP_SYS_ADMIN)) + err = -EACCES; + else { + mutex_lock(&ide_setting_mtx); + err = setfunc(drive, arg); + mutex_unlock(&ide_setting_mtx); + } + } + return err; } static int idedisk_media_changed(struct gendisk *disk) @@ -1063,7 +1148,8 @@ static int ide_disk_probe(ide_drive_t *drive) /* strstr("foo", "") is non-NULL */ if (!strstr("ide-disk", drive->driver_req)) goto failed; - + if (!drive->present) + goto failed; if (drive->media != ide_disk) goto failed; @@ -1077,6 +1163,8 @@ static int ide_disk_probe(ide_drive_t *drive) ide_init_disk(g, drive); + ide_proc_register_driver(drive, &idedisk_driver); + kref_init(&idkp->kref); idkp->drive = drive; diff --git a/trunk/drivers/ide/ide-dma.c b/trunk/drivers/ide/ide-dma.c index ef2f1504c0d5..3fa07c0aeaa4 100644 --- a/trunk/drivers/ide/ide-dma.c +++ b/trunk/drivers/ide/ide-dma.c @@ -106,7 +106,7 @@ ide_startstop_t ide_dma_intr (ide_drive_t *drive) dma_stat = hwif->dma_ops->dma_end(drive); stat = hwif->tp_ops->read_status(hwif); - if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { + if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { if (!dma_stat) { struct request *rq = HWGROUP(drive)->rq; @@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(ide_destroy_dmatable); static int config_drive_for_dma (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - u16 *id = drive->id; + struct hd_driveid *id = drive->id; if (drive->media != ide_disk) { if (hwif->host_flags & IDE_HFLAG_NO_ATAPI_DMA) @@ -299,17 +299,16 @@ static int config_drive_for_dma (ide_drive_t *drive) * Enable DMA on any drive that has * UltraDMA (mode 0/1/2/3/4/5/6) enabled */ - if ((id[ATA_ID_FIELD_VALID] & 4) && - ((id[ATA_ID_UDMA_MODES] >> 8) & 0x7f)) + if ((id->field_valid & 4) && ((id->dma_ultra >> 8) & 0x7f)) return 1; /* * Enable DMA on any drive that has mode2 DMA * (multi or single) enabled */ - if (id[ATA_ID_FIELD_VALID] & 2) /* regular DMA */ - if ((id[ATA_ID_MWDMA_MODES] & 0x404) == 0x404 || - (id[ATA_ID_SWDMA_MODES] & 0x404) == 0x404) + if (id->field_valid & 2) /* regular DMA */ + if ((id->dma_mword & 0x404) == 0x404 || + (id->dma_1word & 0x404) == 0x404) return 1; /* Consult the list of known "good" drives */ @@ -592,12 +591,12 @@ static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } int __ide_dma_bad_drive (ide_drive_t *drive) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; int blacklist = ide_in_drive_list(id, drive_blacklist); if (blacklist) { printk(KERN_WARNING "%s: Disabling (U)DMA for %s (blacklisted)\n", - drive->name, (char *)&id[ATA_ID_PROD]); + drive->name, id->model); return blacklist; } return 0; @@ -613,21 +612,21 @@ static const u8 xfer_mode_bases[] = { static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; ide_hwif_t *hwif = drive->hwif; const struct ide_port_ops *port_ops = hwif->port_ops; unsigned int mask = 0; switch(base) { case XFER_UDMA_0: - if ((id[ATA_ID_FIELD_VALID] & 4) == 0) + if ((id->field_valid & 4) == 0) break; if (port_ops && port_ops->udma_filter) mask = port_ops->udma_filter(drive); else mask = hwif->ultra_mask; - mask &= id[ATA_ID_UDMA_MODES]; + mask &= id->dma_ultra; /* * avoid false cable warning from eighty_ninty_three() @@ -638,19 +637,19 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) } break; case XFER_MW_DMA_0: - if ((id[ATA_ID_FIELD_VALID] & 2) == 0) + if ((id->field_valid & 2) == 0) break; if (port_ops && port_ops->mdma_filter) mask = port_ops->mdma_filter(drive); else mask = hwif->mwdma_mask; - mask &= id[ATA_ID_MWDMA_MODES]; + mask &= id->dma_mword; break; case XFER_SW_DMA_0: - if (id[ATA_ID_FIELD_VALID] & 2) { - mask = id[ATA_ID_SWDMA_MODES] & hwif->swdma_mask; - } else if (id[ATA_ID_OLD_DMA_MODES] >> 8) { - u8 mode = id[ATA_ID_OLD_DMA_MODES] >> 8; + if (id->field_valid & 2) { + mask = id->dma_1word & hwif->swdma_mask; + } else if (id->tDMA) { + u8 mode = id->tDMA; /* * if the mode is valid convert it to the mask @@ -707,8 +706,7 @@ u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode) /* * is this correct? */ - if (ide_dma_good_drive(drive) && - drive->id[ATA_ID_EIDE_DMA_TIME] < 150) + if (ide_dma_good_drive(drive) && drive->id->eide_dma_time < 150) mode = XFER_MW_DMA_1; } @@ -727,7 +725,7 @@ static int ide_tune_dma(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; u8 speed; - if (drive->nodma || ata_id_has_dma(drive->id) == 0) + if (drive->nodma || (drive->id->capability & 1) == 0) return 0; /* consult the list of known "bad" drives */ @@ -769,15 +767,13 @@ static int ide_dma_check(ide_drive_t *drive) int ide_id_dma_bug(ide_drive_t *drive) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; - if (id[ATA_ID_FIELD_VALID] & 4) { - if ((id[ATA_ID_UDMA_MODES] >> 8) && - (id[ATA_ID_MWDMA_MODES] >> 8)) + if (id->field_valid & 4) { + if ((id->dma_ultra >> 8) && (id->dma_mword >> 8)) goto err_out; - } else if (id[ATA_ID_FIELD_VALID] & 2) { - if ((id[ATA_ID_MWDMA_MODES] >> 8) && - (id[ATA_ID_SWDMA_MODES] >> 8)) + } else if (id->field_valid & 2) { + if ((id->dma_mword >> 8) && (id->dma_1word >> 8)) goto err_out; } return 0; diff --git a/trunk/drivers/ide/ide-floppy.c b/trunk/drivers/ide/ide-floppy.c index d36f155470a4..e9034c0125f3 100644 --- a/trunk/drivers/ide/ide-floppy.c +++ b/trunk/drivers/ide/ide-floppy.c @@ -15,8 +15,6 @@ * Documentation/ide/ChangeLog.ide-floppy.1996-2002 */ -#define DRV_NAME "ide-floppy" - #define IDEFLOPPY_VERSION "1.00" #include @@ -33,10 +31,8 @@ #include #include #include -#include #include #include -#include #include @@ -46,8 +42,6 @@ #include #include -#include "ide-floppy.h" - /* define to see debug info */ #define IDEFLOPPY_DEBUG_LOG 0 @@ -61,23 +55,102 @@ #define debug_log(fmt, args...) do {} while (0) #endif + +/* Some drives require a longer irq timeout. */ +#define IDEFLOPPY_WAIT_CMD (5 * WAIT_CMD) + /* * After each failed packet command we issue a request sense command and retry * the packet command IDEFLOPPY_MAX_PC_RETRIES times. */ #define IDEFLOPPY_MAX_PC_RETRIES 3 +/* + * With each packet command, we allocate a buffer of IDEFLOPPY_PC_BUFFER_SIZE + * bytes. + */ +#define IDEFLOPPY_PC_BUFFER_SIZE 256 + +/* + * In various places in the driver, we need to allocate storage for packet + * commands and requests, which will remain valid while we leave the driver to + * wait for an interrupt or a timeout event. + */ +#define IDEFLOPPY_PC_STACK (10 + IDEFLOPPY_MAX_PC_RETRIES) + /* format capacities descriptor codes */ #define CAPACITY_INVALID 0x00 #define CAPACITY_UNFORMATTED 0x01 #define CAPACITY_CURRENT 0x02 #define CAPACITY_NO_CARTRIDGE 0x03 +/* + * Most of our global data which we need to save even as we leave the driver + * due to an interrupt or a timer event is stored in a variable of type + * idefloppy_floppy_t, defined below. + */ +typedef struct ide_floppy_obj { + ide_drive_t *drive; + ide_driver_t *driver; + struct gendisk *disk; + struct kref kref; + unsigned int openers; /* protected by BKL for now */ + + /* Current packet command */ + struct ide_atapi_pc *pc; + /* Last failed packet command */ + struct ide_atapi_pc *failed_pc; + /* Packet command stack */ + struct ide_atapi_pc pc_stack[IDEFLOPPY_PC_STACK]; + /* Next free packet command storage space */ + int pc_stack_index; + struct request rq_stack[IDEFLOPPY_PC_STACK]; + /* We implement a circular array */ + int rq_stack_index; + + /* Last error information */ + u8 sense_key, asc, ascq; + /* delay this long before sending packet command */ + u8 ticks; + int progress_indication; + + /* Device information */ + /* Current format */ + int blocks, block_size, bs_factor; + /* Last format capacity descriptor */ + u8 cap_desc[8]; + /* Copy of the flexible disk page */ + u8 flexible_disk_page[32]; + /* Write protect */ + int wp; + /* Supports format progress report */ + int srfp; +} idefloppy_floppy_t; + #define IDEFLOPPY_TICKS_DELAY HZ/20 /* default delay for ZIP 100 (50ms) */ +/* Defines for the MODE SENSE command */ +#define MODE_SENSE_CURRENT 0x00 +#define MODE_SENSE_CHANGEABLE 0x01 +#define MODE_SENSE_DEFAULT 0x02 +#define MODE_SENSE_SAVED 0x03 + +/* IOCTLs used in low-level formatting. */ +#define IDEFLOPPY_IOCTL_FORMAT_SUPPORTED 0x4600 +#define IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY 0x4601 +#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602 +#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603 + /* Error code returned in rq->errors to the higher part of the driver. */ #define IDEFLOPPY_ERROR_GENERAL 101 +/* + * Pages of the SELECT SENSE / MODE SENSE packet commands. + * See SFF-8070i spec. + */ +#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b +#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05 + static DEFINE_MUTEX(idefloppy_ref_mutex); #define to_ide_floppy(obj) container_of(obj, struct ide_floppy_obj, kref) @@ -146,6 +219,44 @@ static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs) return 0; } +static void ide_floppy_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, + unsigned int bcount, int direction) +{ + ide_hwif_t *hwif = drive->hwif; + struct request *rq = pc->rq; + struct req_iterator iter; + struct bio_vec *bvec; + unsigned long flags; + int count, done = 0; + char *data; + + rq_for_each_segment(bvec, rq, iter) { + if (!bcount) + break; + + count = min(bvec->bv_len, bcount); + + data = bvec_kmap_irq(bvec, &flags); + if (direction) + hwif->tp_ops->output_data(drive, NULL, data, count); + else + hwif->tp_ops->input_data(drive, NULL, data, count); + bvec_kunmap_irq(data, &flags); + + bcount -= count; + pc->b_count += count; + done += count; + } + + idefloppy_end_request(drive, 1, done >> 9); + + if (bcount) { + printk(KERN_ERR "%s: leftover data in %s, bcount == %d\n", + drive->name, __func__, bcount); + ide_pad_transfer(drive, direction, bcount); + } +} + static void idefloppy_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) { @@ -156,6 +267,43 @@ static void idefloppy_update_buffers(ide_drive_t *drive, idefloppy_end_request(drive, 1, 0); } +/* + * Generate a new packet command request in front of the request queue, before + * the current request so that it will be processed immediately, on the next + * pass through the driver. + */ +static void idefloppy_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc, + struct request *rq) +{ + struct ide_floppy_obj *floppy = drive->driver_data; + + blk_rq_init(NULL, rq); + rq->buffer = (char *) pc; + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_flags |= REQ_PREEMPT; + rq->rq_disk = floppy->disk; + memcpy(rq->cmd, pc->c, 12); + ide_do_drive_cmd(drive, rq); +} + +static struct ide_atapi_pc *idefloppy_next_pc_storage(ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + if (floppy->pc_stack_index == IDEFLOPPY_PC_STACK) + floppy->pc_stack_index = 0; + return (&floppy->pc_stack[floppy->pc_stack_index++]); +} + +static struct request *idefloppy_next_rq_storage(ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + if (floppy->rq_stack_index == IDEFLOPPY_PC_STACK) + floppy->rq_stack_index = 0; + return (&floppy->rq_stack[floppy->rq_stack_index++]); +} + static void ide_floppy_callback(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; @@ -193,9 +341,16 @@ static void ide_floppy_callback(ide_drive_t *drive) idefloppy_end_request(drive, uptodate, 0); } -void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *pc) +static void idefloppy_init_pc(struct ide_atapi_pc *pc) +{ + memset(pc, 0, sizeof(*pc)); + pc->buf = pc->pc_buf; + pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE; +} + +static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idefloppy_init_pc(pc); pc->c[0] = GPCMD_REQUEST_SENSE; pc->c[4] = 255; pc->req_xfer = 18; @@ -207,13 +362,14 @@ void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *pc) */ static void idefloppy_retry_pc(ide_drive_t *drive) { - struct ide_floppy_obj *floppy = drive->driver_data; - struct request *rq = &floppy->request_sense_rq; - struct ide_atapi_pc *pc = &floppy->request_sense_pc; + struct ide_atapi_pc *pc; + struct request *rq; (void)ide_read_error(drive); - ide_floppy_create_request_sense_cmd(pc); - ide_queue_pc_head(drive, floppy->disk, pc, rq); + pc = idefloppy_next_pc_storage(drive); + rq = idefloppy_next_rq_storage(drive); + idefloppy_create_request_sense_cmd(pc); + idefloppy_queue_pc_head(drive, pc, rq); } /* The usual interrupt handler called during a packet command. */ @@ -222,8 +378,8 @@ static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive) idefloppy_floppy_t *floppy = drive->driver_data; return ide_pc_intr(drive, floppy->pc, idefloppy_pc_intr, - WAIT_FLOPPY_CMD, NULL, idefloppy_update_buffers, - idefloppy_retry_pc, NULL, ide_io_buffers); + IDEFLOPPY_WAIT_CMD, NULL, idefloppy_update_buffers, + idefloppy_retry_pc, NULL, ide_floppy_io_buffers); } /* @@ -240,9 +396,10 @@ static int idefloppy_transfer_pc(ide_drive_t *drive) drive->hwif->tp_ops->output_data(drive, NULL, floppy->pc->c, 12); /* Timeout for the packet command */ - return WAIT_FLOPPY_CMD; + return IDEFLOPPY_WAIT_CMD; } + /* * Called as an interrupt (or directly). When the device says it's ready for a * packet, we schedule the packet transfer to occur about 2-3 ticks later in @@ -267,7 +424,7 @@ static ide_startstop_t idefloppy_start_pc_transfer(ide_drive_t *drive) timeout = floppy->ticks; expiry = &idefloppy_transfer_pc; } else { - timeout = WAIT_FLOPPY_CMD; + timeout = IDEFLOPPY_WAIT_CMD; expiry = NULL; } @@ -317,27 +474,58 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive, pc->retries++; return ide_issue_pc(drive, pc, idefloppy_start_pc_transfer, - WAIT_FLOPPY_CMD, NULL); + IDEFLOPPY_WAIT_CMD, NULL); +} + +static void idefloppy_create_prevent_cmd(struct ide_atapi_pc *pc, int prevent) +{ + debug_log("creating prevent removal command, prevent = %d\n", prevent); + + idefloppy_init_pc(pc); + pc->c[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; + pc->c[4] = prevent; } -void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc) +static void idefloppy_create_read_capacity_cmd(struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idefloppy_init_pc(pc); pc->c[0] = GPCMD_READ_FORMAT_CAPACITIES; pc->c[7] = 255; pc->c[8] = 255; pc->req_xfer = 255; } +static void idefloppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b, + int l, int flags) +{ + idefloppy_init_pc(pc); + pc->c[0] = GPCMD_FORMAT_UNIT; + pc->c[1] = 0x17; + + memset(pc->buf, 0, 12); + pc->buf[1] = 0xA2; + /* Default format list header, u8 1: FOV/DCRT/IMM bits set */ + + if (flags & 1) /* Verify bit on... */ + pc->buf[1] ^= 0x20; /* ... turn off DCRT bit */ + pc->buf[3] = 8; + + put_unaligned(cpu_to_be32(b), (unsigned int *)(&pc->buf[4])); + put_unaligned(cpu_to_be32(l), (unsigned int *)(&pc->buf[8])); + pc->buf_size = 12; + pc->flags |= PC_FLAG_WRITING; +} + /* A mode sense command is used to "sense" floppy parameters. */ -void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code) +static void idefloppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, + u8 page_code, u8 type) { u16 length = 8; /* sizeof(Mode Parameter Header) = 8 Bytes */ - ide_init_pc(pc); + idefloppy_init_pc(pc); pc->c[0] = GPCMD_MODE_SENSE_10; pc->c[1] = 0; - pc->c[2] = page_code; + pc->c[2] = page_code + (type << 6); switch (page_code) { case IDEFLOPPY_CAPABILITIES_PAGE: @@ -354,6 +542,13 @@ void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code) pc->req_xfer = length; } +static void idefloppy_create_start_stop_cmd(struct ide_atapi_pc *pc, int start) +{ + idefloppy_init_pc(pc); + pc->c[0] = GPCMD_START_STOP_UNIT; + pc->c[4] = start; +} + static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, struct ide_atapi_pc *pc, struct request *rq, unsigned long sector) @@ -365,7 +560,7 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, debug_log("create_rw10_cmd: block == %d, blocks == %d\n", block, blocks); - ide_init_pc(pc); + idefloppy_init_pc(pc); pc->c[0] = cmd == READ ? GPCMD_READ_10 : GPCMD_WRITE_10; put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]); put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]); @@ -373,7 +568,7 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, memcpy(rq->cmd, pc->c, 12); pc->rq = rq; - pc->b_count = 0; + pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; if (rq->cmd_flags & REQ_RW) pc->flags |= PC_FLAG_WRITING; pc->buf = NULL; @@ -384,10 +579,10 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, struct ide_atapi_pc *pc, struct request *rq) { - ide_init_pc(pc); + idefloppy_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - pc->b_count = 0; + pc->b_count = rq->data_len; if (rq->data_len && rq_data_dir(rq) == WRITE) pc->flags |= PC_FLAG_WRITING; pc->buf = rq->data; @@ -404,17 +599,15 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, struct request *rq, sector_t block_s) { idefloppy_floppy_t *floppy = drive->driver_data; - ide_hwif_t *hwif = drive->hwif; struct ide_atapi_pc *pc; unsigned long block = (unsigned long)block_s; - debug_log("%s: dev: %s, cmd: 0x%x, cmd_type: %x, errors: %d\n", - __func__, rq->rq_disk ? rq->rq_disk->disk_name : "?", - rq->cmd[0], rq->cmd_type, rq->errors); - - debug_log("%s: sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n", - __func__, (long)rq->sector, rq->nr_sectors, - rq->current_nr_sectors); + debug_log("dev: %s, cmd_type: %x, errors: %d\n", + rq->rq_disk ? rq->rq_disk->disk_name : "?", + rq->cmd_type, rq->errors); + debug_log("sector: %ld, nr_sectors: %ld, " + "current_nr_sectors: %d\n", (long)rq->sector, + rq->nr_sectors, rq->current_nr_sectors); if (rq->errors >= ERROR_MAX) { if (floppy->failed_pc) @@ -433,12 +626,12 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, idefloppy_end_request(drive, 0, 0); return ide_stopped; } - pc = &floppy->queued_pc; + pc = idefloppy_next_pc_storage(drive); idefloppy_create_rw_cmd(floppy, pc, rq, block); } else if (blk_special_request(rq)) { pc = (struct ide_atapi_pc *) rq->buffer; } else if (blk_pc_request(rq)) { - pc = &floppy->queued_pc; + pc = idefloppy_next_pc_storage(drive); idefloppy_blockpc_cmd(floppy, pc, rq); } else { blk_dump_rq_flags(rq, @@ -447,17 +640,31 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, return ide_stopped; } - ide_init_sg_cmd(drive, rq); - ide_map_sg(drive, rq); - - pc->sg = hwif->sg_table; - pc->sg_cnt = hwif->sg_nents; - pc->rq = rq; return idefloppy_issue_pc(drive, pc); } +/* + * Add a special packet command request to the tail of the request queue, + * and wait for it to be serviced. + */ +static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) +{ + struct ide_floppy_obj *floppy = drive->driver_data; + struct request *rq; + int error; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->buffer = (char *) pc; + rq->cmd_type = REQ_TYPE_SPECIAL; + memcpy(rq->cmd, pc->c, 12); + error = blk_execute_rq(drive->queue, floppy->disk, rq, 0); + blk_put_request(rq); + + return error; +} + /* * Look at the flexible disk page parameters. We ignore the CHS capacity * parameters and use the LBA parameters instead. @@ -465,28 +672,22 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; - struct gendisk *disk = floppy->disk; struct ide_atapi_pc pc; u8 *page; int capacity, lba_capacity; u16 transfer_rate, sector_size, cyls, rpm; u8 heads, sectors; - ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE); + idefloppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE, + MODE_SENSE_CURRENT); - if (ide_queue_pc_tail(drive, disk, &pc)) { + if (idefloppy_queue_pc_tail(drive, &pc)) { printk(KERN_ERR "ide-floppy: Can't get flexible disk page" " parameters\n"); return 1; } - - if (pc.buf[3] & 0x80) - drive->atapi_flags |= IDE_AFLAG_WP; - else - drive->atapi_flags &= ~IDE_AFLAG_WP; - - set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP)); - + floppy->wp = !!(pc.buf[3] & 0x80); + set_disk_ro(floppy->disk, floppy->wp); page = &pc.buf[8]; transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]); @@ -520,6 +721,23 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) return 0; } +static int idefloppy_get_sfrp_bit(ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + struct ide_atapi_pc pc; + + floppy->srfp = 0; + idefloppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE, + MODE_SENSE_CURRENT); + + pc.flags |= PC_FLAG_SUPPRESS_ERROR; + if (idefloppy_queue_pc_tail(drive, &pc)) + return 1; + + floppy->srfp = pc.buf[8 + 2] & 0x40; + return (0); +} + /* * Determine if a media is present in the floppy drive, and if so, its LBA * capacity. @@ -527,7 +745,6 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) static int ide_floppy_get_capacity(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; - struct gendisk *disk = floppy->disk; struct ide_atapi_pc pc; u8 *cap_desc; u8 header_len, desc_cnt; @@ -539,8 +756,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) floppy->bs_factor = 1; set_capacity(floppy->disk, 0); - ide_floppy_create_read_capacity_cmd(&pc); - if (ide_queue_pc_tail(drive, disk, &pc)) { + idefloppy_create_read_capacity_cmd(&pc); + if (idefloppy_queue_pc_tail(drive, &pc)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return 1; } @@ -615,55 +832,202 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) (void) ide_floppy_get_flexible_disk_page(drive); - set_capacity(disk, floppy->blocks * floppy->bs_factor); - + set_capacity(floppy->disk, floppy->blocks * floppy->bs_factor); return rc; } -static sector_t idefloppy_capacity(ide_drive_t *drive) +/* + * Obtain the list of formattable capacities. + * Very similar to ide_floppy_get_capacity, except that we push the capacity + * descriptors to userland, instead of our own structures. + * + * Userland gives us the following structure: + * + * struct idefloppy_format_capacities { + * int nformats; + * struct { + * int nblocks; + * int blocksize; + * } formats[]; + * }; + * + * userland initializes nformats to the number of allocated formats[] records. + * On exit we set nformats to the number of records we've actually initialized. + */ + +static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) { - idefloppy_floppy_t *floppy = drive->driver_data; - unsigned long capacity = floppy->blocks * floppy->bs_factor; + struct ide_atapi_pc pc; + u8 header_len, desc_cnt; + int i, blocks, length, u_array_size, u_index; + int __user *argp; - return capacity; + if (get_user(u_array_size, arg)) + return (-EFAULT); + + if (u_array_size <= 0) + return (-EINVAL); + + idefloppy_create_read_capacity_cmd(&pc); + if (idefloppy_queue_pc_tail(drive, &pc)) { + printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); + return (-EIO); + } + header_len = pc.buf[3]; + desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ + + u_index = 0; + argp = arg + 1; + + /* + * We always skip the first capacity descriptor. That's the current + * capacity. We are interested in the remaining descriptors, the + * formattable capacities. + */ + for (i = 1; i < desc_cnt; i++) { + unsigned int desc_start = 4 + i*8; + + if (u_index >= u_array_size) + break; /* User-supplied buffer too small */ + + blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); + + if (put_user(blocks, argp)) + return(-EFAULT); + ++argp; + + if (put_user(length, argp)) + return (-EFAULT); + ++argp; + + ++u_index; + } + + if (put_user(u_index, arg)) + return (-EFAULT); + return (0); } -#ifdef CONFIG_IDE_PROC_FS -ide_devset_rw_field(bios_cyl, bios_cyl); -ide_devset_rw_field(bios_head, bios_head); -ide_devset_rw_field(bios_sect, bios_sect); +/* + * Get ATAPI_FORMAT_UNIT progress indication. + * + * Userland gives a pointer to an int. The int is set to a progress + * indicator 0-65536, with 65536=100%. + * + * If the drive does not support format progress indication, we just check + * the dsc bit, and return either 0 or 65536. + */ -static int get_ticks(ide_drive_t *drive) +static int idefloppy_get_format_progress(ide_drive_t *drive, int __user *arg) { idefloppy_floppy_t *floppy = drive->driver_data; - return floppy->ticks; + struct ide_atapi_pc pc; + int progress_indication = 0x10000; + + if (floppy->srfp) { + idefloppy_create_request_sense_cmd(&pc); + if (idefloppy_queue_pc_tail(drive, &pc)) + return (-EIO); + + if (floppy->sense_key == 2 && + floppy->asc == 4 && + floppy->ascq == 4) + progress_indication = floppy->progress_indication; + + /* Else assume format_unit has finished, and we're at 0x10000 */ + } else { + ide_hwif_t *hwif = drive->hwif; + unsigned long flags; + u8 stat; + + local_irq_save(flags); + stat = hwif->tp_ops->read_status(hwif); + local_irq_restore(flags); + + progress_indication = ((stat & SEEK_STAT) == 0) ? 0 : 0x10000; + } + if (put_user(progress_indication, arg)) + return (-EFAULT); + + return (0); } -static int set_ticks(ide_drive_t *drive, int arg) +static sector_t idefloppy_capacity(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; - floppy->ticks = arg; + unsigned long capacity = floppy->blocks * floppy->bs_factor; + + return capacity; +} + +/* + * Check whether we can support a drive, based on the ATAPI IDENTIFY command + * results. + */ +static int idefloppy_identify_device(ide_drive_t *drive, struct hd_driveid *id) +{ + u8 gcw[2]; + u8 device_type, protocol, removable, drq_type, packet_size; + + *((u16 *) &gcw) = id->config; + + device_type = gcw[1] & 0x1F; + removable = (gcw[0] & 0x80) >> 7; + protocol = (gcw[1] & 0xC0) >> 6; + drq_type = (gcw[0] & 0x60) >> 5; + packet_size = gcw[0] & 0x03; + +#ifdef CONFIG_PPC + /* kludge for Apple PowerBook internal zip */ + if (device_type == 5 && + !strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP")) + device_type = 0; +#endif + + if (protocol != 2) + printk(KERN_ERR "ide-floppy: Protocol (0x%02x) is not ATAPI\n", + protocol); + else if (device_type != 0) + printk(KERN_ERR "ide-floppy: Device type (0x%02x) is not set " + "to floppy\n", device_type); + else if (!removable) + printk(KERN_ERR "ide-floppy: The removable flag is not set\n"); + else if (drq_type == 3) + printk(KERN_ERR "ide-floppy: Sorry, DRQ type (0x%02x) not " + "supported\n", drq_type); + else if (packet_size != 0) + printk(KERN_ERR "ide-floppy: Packet size (0x%02x) is not 12 " + "bytes\n", packet_size); + else + return 1; return 0; } -IDE_DEVSET(ticks, DS_SYNC, get_ticks, set_ticks); +#ifdef CONFIG_IDE_PROC_FS +static void idefloppy_add_settings(ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; -static const struct ide_proc_devset idefloppy_settings[] = { - IDE_PROC_DEVSET(bios_cyl, 0, 1023), - IDE_PROC_DEVSET(bios_head, 0, 255), - IDE_PROC_DEVSET(bios_sect, 0, 63), - IDE_PROC_DEVSET(ticks, 0, 255), - { 0 }, -}; + ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 1023, 1, 1, + &drive->bios_cyl, NULL); + ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, + &drive->bios_head, NULL); + ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, + &drive->bios_sect, NULL); + ide_add_setting(drive, "ticks", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, + &floppy->ticks, NULL); +} +#else +static inline void idefloppy_add_settings(ide_drive_t *drive) { ; } #endif static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) { - u16 *id = drive->id; u8 gcw[2]; - *((u16 *)&gcw) = id[ATA_ID_CONFIG]; - + *((u16 *) &gcw) = drive->id->config; + floppy->pc = floppy->pc_stack; drive->pc_callback = ide_floppy_callback; if (((gcw[0] & 0x60) >> 5) == 1) @@ -677,7 +1041,7 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) * it. It should be fixed as of version 1.9, but to be on the safe side * we'll leave the limitation below for the 2.2.x tree. */ - if (!strncmp((char *)&id[ATA_ID_PROD], "IOMEGA ZIP 100 ATAPI", 20)) { + if (!strncmp(drive->id->model, "IOMEGA ZIP 100 ATAPI", 20)) { drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE; /* This value will be visible in the /proc/ide/hdx/settings */ floppy->ticks = IDEFLOPPY_TICKS_DELAY; @@ -688,16 +1052,13 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) * Guess what? The IOMEGA Clik! drive also needs the above fix. It makes * nasty clicking noises without it, so please don't remove this. */ - if (strncmp((char *)&id[ATA_ID_PROD], "IOMEGA Clik!", 11) == 0) { + if (strncmp(drive->id->model, "IOMEGA Clik!", 11) == 0) { blk_queue_max_sectors(drive->queue, 64); drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE; - /* IOMEGA Clik! drives do not support lock/unlock commands */ - drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; } (void) ide_floppy_get_capacity(drive); - - ide_proc_register_driver(drive, floppy->driver); + idefloppy_add_settings(drive); } static void ide_floppy_remove(ide_drive_t *drive) @@ -754,12 +1115,12 @@ static ide_driver_t idefloppy_driver = { .remove = ide_floppy_remove, .version = IDEFLOPPY_VERSION, .media = ide_floppy, + .supports_dsc_overlap = 0, .do_request = idefloppy_do_request, .end_request = idefloppy_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idefloppy_proc, - .settings = idefloppy_settings, #endif }; @@ -768,6 +1129,7 @@ static int idefloppy_open(struct inode *inode, struct file *filp) struct gendisk *disk = inode->i_bdev->bd_disk; struct ide_floppy_obj *floppy; ide_drive_t *drive; + struct ide_atapi_pc pc; int ret = 0; debug_log("Reached %s\n", __func__); @@ -784,8 +1146,13 @@ static int idefloppy_open(struct inode *inode, struct file *filp) drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; /* Just in case */ - if (ide_do_test_unit_ready(drive, disk)) - ide_do_start_stop(drive, disk, 1); + idefloppy_init_pc(&pc); + pc.c[0] = GPCMD_TEST_UNIT_READY; + + if (idefloppy_queue_pc_tail(drive, &pc)) { + idefloppy_create_start_stop_cmd(&pc, 1); + (void) idefloppy_queue_pc_tail(drive, &pc); + } if (ide_floppy_get_capacity(drive) && (filp->f_flags & O_NDELAY) == 0 @@ -799,13 +1166,16 @@ static int idefloppy_open(struct inode *inode, struct file *filp) goto out_put_floppy; } - if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) { + if (floppy->wp && (filp->f_mode & 2)) { ret = -EROFS; goto out_put_floppy; } - drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED; - ide_set_media_lock(drive, disk, 1); + /* IOMEGA Clik! drives do not support lock/unlock commands */ + if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) { + idefloppy_create_prevent_cmd(&pc, 1); + (void) idefloppy_queue_pc_tail(drive, &pc); + } check_disk_change(inode->i_bdev); } else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) { ret = -EBUSY; @@ -824,11 +1194,17 @@ static int idefloppy_release(struct inode *inode, struct file *filp) struct gendisk *disk = inode->i_bdev->bd_disk; struct ide_floppy_obj *floppy = ide_floppy_g(disk); ide_drive_t *drive = floppy->drive; + struct ide_atapi_pc pc; debug_log("Reached %s\n", __func__); if (floppy->openers == 1) { - ide_set_media_lock(drive, disk, 0); + /* IOMEGA Clik! drives do not support lock/unlock commands */ + if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) { + idefloppy_create_prevent_cmd(&pc, 0); + (void) idefloppy_queue_pc_tail(drive, &pc); + } + drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; } @@ -854,20 +1230,80 @@ static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned long arg, unsigned int cmd) { idefloppy_floppy_t *floppy = drive->driver_data; - struct gendisk *disk = floppy->disk; - int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0; if (floppy->openers > 1) return -EBUSY; - ide_set_media_lock(drive, disk, prevent); + /* The IOMEGA Clik! Drive doesn't support this command - + * no room for an eject mechanism */ + if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) { + int prevent = arg ? 1 : 0; + + if (cmd == CDROMEJECT) + prevent = 0; - if (cmd == CDROMEJECT) - ide_do_start_stop(drive, disk, 2); + idefloppy_create_prevent_cmd(pc, prevent); + (void) idefloppy_queue_pc_tail(floppy->drive, pc); + } + + if (cmd == CDROMEJECT) { + idefloppy_create_start_stop_cmd(pc, 2); + (void) idefloppy_queue_pc_tail(floppy->drive, pc); + } return 0; } +static int ide_floppy_format_unit(idefloppy_floppy_t *floppy, + int __user *arg) +{ + struct ide_atapi_pc pc; + ide_drive_t *drive = floppy->drive; + int blocks, length, flags, err = 0; + + if (floppy->openers > 1) { + /* Don't format if someone is using the disk */ + drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; + return -EBUSY; + } + + drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS; + + /* + * Send ATAPI_FORMAT_UNIT to the drive. + * + * Userland gives us the following structure: + * + * struct idefloppy_format_command { + * int nblocks; + * int blocksize; + * int flags; + * } ; + * + * flags is a bitmask, currently, the only defined flag is: + * + * 0x01 - verify media after format. + */ + if (get_user(blocks, arg) || + get_user(length, arg+1) || + get_user(flags, arg+2)) { + err = -EFAULT; + goto out; + } + + (void) idefloppy_get_sfrp_bit(drive); + idefloppy_create_format_unit_cmd(&pc, blocks, length, flags); + + if (idefloppy_queue_pc_tail(drive, &pc)) + err = -EIO; + +out: + if (err) + drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; + return err; +} + + static int idefloppy_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -878,12 +1314,23 @@ static int idefloppy_ioctl(struct inode *inode, struct file *file, void __user *argp = (void __user *)arg; int err; - if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) + switch (cmd) { + case CDROMEJECT: + /* fall through */ + case CDROM_LOCKDOOR: return ide_floppy_lockdoor(drive, &pc, arg, cmd); - - err = ide_floppy_format_ioctl(drive, file, cmd, argp); - if (err != -ENOTTY) - return err; + case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED: + return 0; + case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: + return ide_floppy_get_format_capacities(drive, argp); + case IDEFLOPPY_IOCTL_FORMAT_START: + if (!(file->f_mode & 2)) + return -EPERM; + + return ide_floppy_format_unit(floppy, (int __user *)arg); + case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: + return idefloppy_get_format_progress(drive, argp); + } /* * skip SCSI_IOCTL_SEND_COMMAND (deprecated) @@ -892,6 +1339,8 @@ static int idefloppy_ioctl(struct inode *inode, struct file *file, if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND) err = scsi_cmd_ioctl(file, bdev->bd_disk->queue, bdev->bd_disk, cmd, argp); + else + err = -ENOTTY; if (err == -ENOTTY) err = generic_ide_ioctl(drive, file, bdev, cmd, arg); @@ -939,11 +1388,11 @@ static int ide_floppy_probe(ide_drive_t *drive) if (!strstr("ide-floppy", drive->driver_req)) goto failed; - + if (!drive->present) + goto failed; if (drive->media != ide_floppy) goto failed; - - if (!ide_check_atapi_device(drive, DRV_NAME)) { + if (!idefloppy_identify_device(drive, drive->id)) { printk(KERN_ERR "ide-floppy: %s: not supported by this version" " of ide-floppy\n", drive->name); goto failed; @@ -961,6 +1410,8 @@ static int ide_floppy_probe(ide_drive_t *drive) ide_init_disk(g, drive); + ide_proc_register_driver(drive, &idefloppy_driver); + kref_init(&floppy->kref); floppy->drive = drive; @@ -999,7 +1450,6 @@ static int __init idefloppy_init(void) } MODULE_ALIAS("ide:*m-floppy*"); -MODULE_ALIAS("ide-floppy"); module_init(idefloppy_init); module_exit(idefloppy_exit); MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/ide/ide-floppy.h b/trunk/drivers/ide/ide-floppy.h deleted file mode 100644 index ecadc2bc322d..000000000000 --- a/trunk/drivers/ide/ide-floppy.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef __IDE_FLOPPY_H -#define __IDE_FLOPPY_H - -/* - * Most of our global data which we need to save even as we leave the driver - * due to an interrupt or a timer event is stored in a variable of type - * idefloppy_floppy_t, defined below. - */ -typedef struct ide_floppy_obj { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct kref kref; - unsigned int openers; /* protected by BKL for now */ - - /* Current packet command */ - struct ide_atapi_pc *pc; - /* Last failed packet command */ - struct ide_atapi_pc *failed_pc; - /* used for blk_{fs,pc}_request() requests */ - struct ide_atapi_pc queued_pc; - - struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; - - /* Last error information */ - u8 sense_key, asc, ascq; - /* delay this long before sending packet command */ - u8 ticks; - int progress_indication; - - /* Device information */ - /* Current format */ - int blocks, block_size, bs_factor; - /* Last format capacity descriptor */ - u8 cap_desc[8]; - /* Copy of the flexible disk page */ - u8 flexible_disk_page[32]; -} idefloppy_floppy_t; - -/* - * Pages of the SELECT SENSE / MODE SENSE packet commands. - * See SFF-8070i spec. - */ -#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b -#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05 - -/* IOCTLs used in low-level formatting. */ -#define IDEFLOPPY_IOCTL_FORMAT_SUPPORTED 0x4600 -#define IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY 0x4601 -#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602 -#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603 - -/* ide-floppy.c */ -void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8); -void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *); -void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *); - -/* ide-floppy_ioctl.c */ -int ide_floppy_format_ioctl(ide_drive_t *, struct file *, unsigned int, - void __user *); - -#endif /*__IDE_FLOPPY_H */ diff --git a/trunk/drivers/ide/ide-floppy_ioctl.c b/trunk/drivers/ide/ide-floppy_ioctl.c deleted file mode 100644 index 5ffc4512d14b..000000000000 --- a/trunk/drivers/ide/ide-floppy_ioctl.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * ide-floppy IOCTLs handling. - */ - -#include -#include -#include - -#include - -#include - -#include "ide-floppy.h" - -/* - * Obtain the list of formattable capacities. - * Very similar to ide_floppy_get_capacity, except that we push the capacity - * descriptors to userland, instead of our own structures. - * - * Userland gives us the following structure: - * - * struct idefloppy_format_capacities { - * int nformats; - * struct { - * int nblocks; - * int blocksize; - * } formats[]; - * }; - * - * userland initializes nformats to the number of allocated formats[] records. - * On exit we set nformats to the number of records we've actually initialized. - */ - -static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) -{ - struct ide_floppy_obj *floppy = drive->driver_data; - struct ide_atapi_pc pc; - u8 header_len, desc_cnt; - int i, blocks, length, u_array_size, u_index; - int __user *argp; - - if (get_user(u_array_size, arg)) - return -EFAULT; - - if (u_array_size <= 0) - return -EINVAL; - - ide_floppy_create_read_capacity_cmd(&pc); - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) { - printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); - return -EIO; - } - - header_len = pc.buf[3]; - desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ - - u_index = 0; - argp = arg + 1; - - /* - * We always skip the first capacity descriptor. That's the current - * capacity. We are interested in the remaining descriptors, the - * formattable capacities. - */ - for (i = 1; i < desc_cnt; i++) { - unsigned int desc_start = 4 + i*8; - - if (u_index >= u_array_size) - break; /* User-supplied buffer too small */ - - blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); - length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); - - if (put_user(blocks, argp)) - return -EFAULT; - - ++argp; - - if (put_user(length, argp)) - return -EFAULT; - - ++argp; - - ++u_index; - } - - if (put_user(u_index, arg)) - return -EFAULT; - - return 0; -} - -static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b, - int l, int flags) -{ - ide_init_pc(pc); - pc->c[0] = GPCMD_FORMAT_UNIT; - pc->c[1] = 0x17; - - memset(pc->buf, 0, 12); - pc->buf[1] = 0xA2; - /* Default format list header, u8 1: FOV/DCRT/IMM bits set */ - - if (flags & 1) /* Verify bit on... */ - pc->buf[1] ^= 0x20; /* ... turn off DCRT bit */ - pc->buf[3] = 8; - - put_unaligned(cpu_to_be32(b), (unsigned int *)(&pc->buf[4])); - put_unaligned(cpu_to_be32(l), (unsigned int *)(&pc->buf[8])); - pc->buf_size = 12; - pc->flags |= PC_FLAG_WRITING; -} - -static int ide_floppy_get_sfrp_bit(ide_drive_t *drive) -{ - idefloppy_floppy_t *floppy = drive->driver_data; - struct ide_atapi_pc pc; - - drive->atapi_flags &= ~IDE_AFLAG_SRFP; - - ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE); - pc.flags |= PC_FLAG_SUPPRESS_ERROR; - - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) - return 1; - - if (pc.buf[8 + 2] & 0x40) - drive->atapi_flags |= IDE_AFLAG_SRFP; - - return 0; -} - -static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg) -{ - idefloppy_floppy_t *floppy = drive->driver_data; - struct ide_atapi_pc pc; - int blocks, length, flags, err = 0; - - if (floppy->openers > 1) { - /* Don't format if someone is using the disk */ - drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; - return -EBUSY; - } - - drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS; - - /* - * Send ATAPI_FORMAT_UNIT to the drive. - * - * Userland gives us the following structure: - * - * struct idefloppy_format_command { - * int nblocks; - * int blocksize; - * int flags; - * } ; - * - * flags is a bitmask, currently, the only defined flag is: - * - * 0x01 - verify media after format. - */ - if (get_user(blocks, arg) || - get_user(length, arg+1) || - get_user(flags, arg+2)) { - err = -EFAULT; - goto out; - } - - (void)ide_floppy_get_sfrp_bit(drive); - ide_floppy_create_format_unit_cmd(&pc, blocks, length, flags); - - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) - err = -EIO; - -out: - if (err) - drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; - return err; -} - -/* - * Get ATAPI_FORMAT_UNIT progress indication. - * - * Userland gives a pointer to an int. The int is set to a progress - * indicator 0-65536, with 65536=100%. - * - * If the drive does not support format progress indication, we just check - * the dsc bit, and return either 0 or 65536. - */ - -static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg) -{ - idefloppy_floppy_t *floppy = drive->driver_data; - struct ide_atapi_pc pc; - int progress_indication = 0x10000; - - if (drive->atapi_flags & IDE_AFLAG_SRFP) { - ide_floppy_create_request_sense_cmd(&pc); - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) - return -EIO; - - if (floppy->sense_key == 2 && - floppy->asc == 4 && - floppy->ascq == 4) - progress_indication = floppy->progress_indication; - - /* Else assume format_unit has finished, and we're at 0x10000 */ - } else { - ide_hwif_t *hwif = drive->hwif; - unsigned long flags; - u8 stat; - - local_irq_save(flags); - stat = hwif->tp_ops->read_status(hwif); - local_irq_restore(flags); - - progress_indication = ((stat & ATA_DSC) == 0) ? 0 : 0x10000; - } - - if (put_user(progress_indication, arg)) - return -EFAULT; - - return 0; -} - -int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, - unsigned int cmd, void __user *argp) -{ - switch (cmd) { - case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED: - return 0; - case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: - return ide_floppy_get_format_capacities(drive, argp); - case IDEFLOPPY_IOCTL_FORMAT_START: - if (!(file->f_mode & 2)) - return -EPERM; - return ide_floppy_format_unit(drive, (int __user *)argp); - case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: - return ide_floppy_get_format_progress(drive, argp); - default: - return -ENOTTY; - } -} diff --git a/trunk/drivers/ide/ide-generic.c b/trunk/drivers/ide/ide-generic.c index 0a3cb0c33ae5..8fe8b5b9cf7d 100644 --- a/trunk/drivers/ide/ide-generic.c +++ b/trunk/drivers/ide/ide-generic.c @@ -19,7 +19,6 @@ #include #include #include -#include /* FIXME: convert m32r to use ide_platform host driver */ #ifdef CONFIG_M32R @@ -28,7 +27,7 @@ #define DRV_NAME "ide_generic" -static int probe_mask; +static int probe_mask = 0x03; module_param(probe_mask, int, 0); MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports"); @@ -101,65 +100,19 @@ static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 }; static const int legacy_irqs[] = { 14, 15, 11, 10, 8, 12 }; #endif -static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) -{ - struct pci_dev *p = NULL; - u16 val; - - for_each_pci_dev(p) { - - if (pci_resource_start(p, 0) == 0x1f0) - *primary = 1; - if (pci_resource_start(p, 2) == 0x170) - *secondary = 1; - - /* Cyrix CS55{1,2}0 pre SFF MWDMA ATA on the bridge */ - if (p->vendor == PCI_VENDOR_ID_CYRIX && - (p->device == PCI_DEVICE_ID_CYRIX_5510 || - p->device == PCI_DEVICE_ID_CYRIX_5520)) - *primary = *secondary = 1; - - /* Intel MPIIX - PIO ATA on non PCI side of bridge */ - if (p->vendor == PCI_VENDOR_ID_INTEL && - p->device == PCI_DEVICE_ID_INTEL_82371MX) { - - pci_read_config_word(p, 0x6C, &val); - if (val & 0x8000) { - /* ATA port enabled */ - if (val & 0x4000) - *secondary = 1; - else - *primary = 1; - } - } - } -} - static int __init ide_generic_init(void) { hw_regs_t hw[MAX_HWIFS], *hws[MAX_HWIFS]; struct ide_host *host; unsigned long io_addr; - int i, rc, primary = 0, secondary = 0; + int i, rc; #ifdef CONFIG_MIPS if (!ide_probe_legacy()) return -ENODEV; #endif - ide_generic_check_pci_legacy_iobases(&primary, &secondary); - - if (!probe_mask) { - printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" " - "module parameter for probing all legacy ISA IDE ports\n"); - - if (primary == 0) - probe_mask |= 0x1; - - if (secondary == 0) - probe_mask |= 0x2; - } else - printk(KERN_INFO DRV_NAME ": enforcing probing of I/O ports " - "upon user request\n"); + printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" module " + "parameter for probing all legacy ISA IDE ports\n"); memset(hws, 0, sizeof(hw_regs_t *) * MAX_HWIFS); diff --git a/trunk/drivers/ide/ide-io.c b/trunk/drivers/ide/ide-io.c index 1c51949833be..a896a283f27f 100644 --- a/trunk/drivers/ide/ide-io.c +++ b/trunk/drivers/ide/ide-io.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -184,18 +183,18 @@ static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request * if (drive->media != ide_disk) break; /* Not supported? Switch to next step now. */ - if (!drive->wcache || ata_id_flush_enabled(drive->id) == 0) { + if (!drive->wcache || !ide_id_has_flush_cache(drive->id)) { ide_complete_power_step(drive, rq, 0, 0); return ide_stopped; } - if (ata_id_flush_ext_enabled(drive->id)) - args->tf.command = ATA_CMD_FLUSH_EXT; + if (ide_id_has_flush_cache_ext(drive->id)) + args->tf.command = WIN_FLUSH_CACHE_EXT; else - args->tf.command = ATA_CMD_FLUSH; + args->tf.command = WIN_FLUSH_CACHE; goto out_do_tf; case idedisk_pm_standby: /* Suspend step 2 (standby) */ - args->tf.command = ATA_CMD_STANDBYNOW1; + args->tf.command = WIN_STANDBYNOW1; goto out_do_tf; case idedisk_pm_restore_pio: /* Resume step 1 (restore PIO) */ @@ -210,7 +209,7 @@ static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request * return ide_stopped; case idedisk_pm_idle: /* Resume step 2 (idle) */ - args->tf.command = ATA_CMD_IDLEIMMEDIATE; + args->tf.command = WIN_IDLEIMMEDIATE; goto out_do_tf; case ide_pm_restore_dma: /* Resume step 3 (restore DMA) */ @@ -323,7 +322,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) ide_task_t *task = (ide_task_t *)rq->special; if (rq->errors == 0) - rq->errors = !OK_STAT(stat, ATA_DRDY, BAD_STAT); + rq->errors = !OK_STAT(stat, READY_STAT, BAD_STAT); if (task) { struct ide_taskfile *tf = &task->tf; @@ -374,29 +373,29 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8 { ide_hwif_t *hwif = drive->hwif; - if ((stat & ATA_BUSY) || ((stat & ATA_DF) && !drive->nowerr)) { + if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */ rq->errors |= ERROR_RESET; - } else if (stat & ATA_ERR) { + } else if (stat & ERR_STAT) { /* err has different meaning on cdrom and tape */ - if (err == ATA_ABORTED) { + if (err == ABRT_ERR) { if (drive->select.b.lba && - /* some newer drives don't support ATA_CMD_INIT_DEV_PARAMS */ - hwif->tp_ops->read_status(hwif) == ATA_CMD_INIT_DEV_PARAMS) + /* some newer drives don't support WIN_SPECIFY */ + hwif->tp_ops->read_status(hwif) == WIN_SPECIFY) return ide_stopped; } else if ((err & BAD_CRC) == BAD_CRC) { /* UDMA crc error, just retry the operation */ drive->crc_count++; - } else if (err & (ATA_BBK | ATA_UNC)) { + } else if (err & (BBD_ERR | ECC_ERR)) { /* retries won't help these */ rq->errors = ERROR_MAX; - } else if (err & ATA_TRK0NF) { + } else if (err & TRK0_ERR) { /* help it find track zero */ rq->errors |= ERROR_RECAL; } } - if ((stat & ATA_DRQ) && rq_data_dir(rq) == READ && + if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ && (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) { int nsect = drive->mult_count ? drive->mult_count : 1; @@ -408,7 +407,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8 return ide_stopped; } - if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) + if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) rq->errors |= ERROR_RESET; if ((rq->errors & ERROR_RESET) == ERROR_RESET) { @@ -428,16 +427,16 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u { ide_hwif_t *hwif = drive->hwif; - if ((stat & ATA_BUSY) || ((stat & ATA_DF) && !drive->nowerr)) { + if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */ rq->errors |= ERROR_RESET; } else { /* add decoding error stuff */ } - if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) + if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) /* force an abort */ - hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); + hwif->tp_ops->exec_command(hwif, WIN_IDLEIMMEDIATE); if (rq->errors >= ERROR_MAX) { ide_kill_rq(drive, rq); @@ -510,19 +509,19 @@ static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) tf->lbam = drive->cyl; tf->lbah = drive->cyl >> 8; tf->device = ((drive->head - 1) | drive->select.all) & ~ATA_LBA; - tf->command = ATA_CMD_INIT_DEV_PARAMS; + tf->command = WIN_SPECIFY; } static void ide_tf_set_restore_cmd(ide_drive_t *drive, struct ide_taskfile *tf) { tf->nsect = drive->sect; - tf->command = ATA_CMD_RESTORE; + tf->command = WIN_RESTORE; } static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf) { tf->nsect = drive->mult_req; - tf->command = ATA_CMD_SET_MULTI; + tf->command = WIN_SETMULT; } static ide_startstop_t ide_disk_special(ide_drive_t *drive) @@ -541,6 +540,8 @@ static ide_startstop_t ide_disk_special(ide_drive_t *drive) ide_tf_set_restore_cmd(drive, &args.tf); } else if (s->b.set_multmode) { s->b.set_multmode = 0; + if (drive->mult_req > drive->id->max_multsect) + drive->mult_req = drive->id->max_multsect; ide_tf_set_setmult_cmd(drive, &args.tf); } else if (s->all) { int special = s->all; @@ -585,10 +586,9 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio) * do_special - issue some special commands * @drive: drive the command is for * - * do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS, - * ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive. - * - * It used to do much more, but has been scaled back. + * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT + * commands to a drive. It used to do much more, but has been scaled + * back. */ static ide_startstop_t do_special (ide_drive_t *drive) @@ -716,49 +716,9 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, return ide_stopped; } -int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, - int arg) -{ - struct request_queue *q = drive->queue; - struct request *rq; - int ret = 0; - - if (!(setting->flags & DS_SYNC)) - return setting->set(drive, arg); - - rq = blk_get_request(q, READ, GFP_KERNEL); - if (!rq) - return -ENOMEM; - - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_len = 5; - rq->cmd[0] = REQ_DEVSET_EXEC; - *(int *)&rq->cmd[1] = arg; - rq->special = setting->set; - - if (blk_execute_rq(q, NULL, rq, 0)) - ret = rq->errors; - blk_put_request(rq); - - return ret; -} -EXPORT_SYMBOL_GPL(ide_devset_execute); - static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) { switch (rq->cmd[0]) { - case REQ_DEVSET_EXEC: - { - int err, (*setfunc)(ide_drive_t *, int) = rq->special; - - err = setfunc(drive, *(int *)&rq->cmd[1]); - if (err) - rq->errors = err; - else - err = 1; - ide_end_request(drive, err, 0); - return ide_stopped; - } case REQ_DRIVE_RESET: return ide_do_reset(drive); default: @@ -806,7 +766,9 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) * start_request - start of I/O and command issuing for IDE * * start_request() initiates handling of a new I/O request. It - * accepts commands and I/O (read/write) requests. + * accepts commands and I/O (read/write) requests. It also does + * the final remapping for weird stuff like EZDrive. Once + * device mapper can work sector level the EZDrive stuff can go away * * FIXME: this function needs a rename */ @@ -814,6 +776,7 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; + sector_t block; BUG_ON(!blk_rq_started(rq)); @@ -828,12 +791,21 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } + block = rq->sector; + if (blk_fs_request(rq) && + (drive->media == ide_disk || drive->media == ide_floppy)) { + block += drive->sect0; + } + /* Yecch - this will shift the entire interval, + possibly killing some innocent following sector */ + if (block == 0 && drive->remap_0_to_1 == 1) + block = 1; /* redirect MBR access to EZ-Drive partn table */ + if (blk_pm_request(rq)) ide_check_pm_state(drive, rq); SELECT_DRIVE(drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, - ATA_BUSY | ATA_DRQ, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk(KERN_ERR "%s: drive not ready for command\n", drive->name); return startstop; } @@ -872,8 +844,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) return ide_special_rq(drive, rq); drv = *(ide_driver_t **)rq->rq_disk->private_data; - - return drv->do_request(drive, rq, rq->sector); + return drv->do_request(drive, rq, block); } return do_special(drive); kill_rq: @@ -1354,7 +1325,7 @@ static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup) if (hwif->irq == irq) { stat = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) { /* Try to not flood the console with msgs */ static unsigned long last_msgtime, count; ++count; diff --git a/trunk/drivers/ide/ide-ioctls.c b/trunk/drivers/ide/ide-ioctls.c deleted file mode 100644 index cf01564901af..000000000000 --- a/trunk/drivers/ide/ide-ioctls.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * IDE ioctls handling. - */ - -#include -#include - -static const struct ide_ioctl_devset ide_ioctl_settings[] = { -{ HDIO_GET_32BIT, HDIO_SET_32BIT, &ide_devset_io_32bit }, -{ HDIO_GET_KEEPSETTINGS, HDIO_SET_KEEPSETTINGS, &ide_devset_keepsettings }, -{ HDIO_GET_UNMASKINTR, HDIO_SET_UNMASKINTR, &ide_devset_unmaskirq }, -{ HDIO_GET_DMA, HDIO_SET_DMA, &ide_devset_using_dma }, -{ -1, HDIO_SET_PIO_MODE, &ide_devset_pio_mode }, -{ 0 } -}; - -int ide_setting_ioctl(ide_drive_t *drive, struct block_device *bdev, - unsigned int cmd, unsigned long arg, - const struct ide_ioctl_devset *s) -{ - const struct ide_devset *ds; - unsigned long flags; - int err = -EOPNOTSUPP; - - for (; (ds = s->setting); s++) { - if (ds->get && s->get_ioctl == cmd) - goto read_val; - else if (ds->set && s->set_ioctl == cmd) - goto set_val; - } - - return err; - -read_val: - mutex_lock(&ide_setting_mtx); - spin_lock_irqsave(&ide_lock, flags); - err = ds->get(drive); - spin_unlock_irqrestore(&ide_lock, flags); - mutex_unlock(&ide_setting_mtx); - return err >= 0 ? put_user(err, (long __user *)arg) : err; - -set_val: - if (bdev != bdev->bd_contains) - err = -EINVAL; - else { - if (!capable(CAP_SYS_ADMIN)) - err = -EACCES; - else { - mutex_lock(&ide_setting_mtx); - err = ide_devset_execute(drive, ds, arg); - mutex_unlock(&ide_setting_mtx); - } - } - return err; -} -EXPORT_SYMBOL_GPL(ide_setting_ioctl); - -static int ide_get_identity_ioctl(ide_drive_t *drive, unsigned int cmd, - unsigned long arg) -{ - u16 *id = NULL; - int size = (cmd == HDIO_GET_IDENTITY) ? (ATA_ID_WORDS * 2) : 142; - int rc = 0; - - if (drive->id_read == 0) { - rc = -ENOMSG; - goto out; - } - - id = kmalloc(size, GFP_KERNEL); - if (id == NULL) { - rc = -ENOMEM; - goto out; - } - - memcpy(id, drive->id, size); - ata_id_to_hd_driveid(id); - - if (copy_to_user((void __user *)arg, id, size)) - rc = -EFAULT; - - kfree(id); -out: - return rc; -} - -static int ide_get_nice_ioctl(ide_drive_t *drive, unsigned long arg) -{ - return put_user((drive->dsc_overlap << IDE_NICE_DSC_OVERLAP) | - (drive->nice1 << IDE_NICE_1), (long __user *)arg); -} - -static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg) -{ - if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1)))) - return -EPERM; - - if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) && - (drive->media == ide_disk || drive->media == ide_floppy || - drive->scsi)) - return -EPERM; - - drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1; - drive->nice1 = (arg >> IDE_NICE_1) & 1; - - return 0; -} - -static int ide_cmd_ioctl(ide_drive_t *drive, unsigned cmd, unsigned long arg) -{ - u8 *buf = NULL; - int bufsize = 0, err = 0; - u8 args[4], xfer_rate = 0; - ide_task_t tfargs; - struct ide_taskfile *tf = &tfargs.tf; - u16 *id = drive->id; - - if (NULL == (void *) arg) { - struct request *rq; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - err = blk_execute_rq(drive->queue, NULL, rq, 0); - blk_put_request(rq); - - return err; - } - - if (copy_from_user(args, (void __user *)arg, 4)) - return -EFAULT; - - memset(&tfargs, 0, sizeof(ide_task_t)); - tf->feature = args[2]; - if (args[0] == ATA_CMD_SMART) { - tf->nsect = args[3]; - tf->lbal = args[1]; - tf->lbam = 0x4f; - tf->lbah = 0xc2; - tfargs.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_IN_NSECT; - } else { - tf->nsect = args[1]; - tfargs.tf_flags = IDE_TFLAG_OUT_FEATURE | - IDE_TFLAG_OUT_NSECT | IDE_TFLAG_IN_NSECT; - } - tf->command = args[0]; - tfargs.data_phase = args[3] ? TASKFILE_IN : TASKFILE_NO_DATA; - - if (args[3]) { - tfargs.tf_flags |= IDE_TFLAG_IO_16BIT; - bufsize = SECTOR_SIZE * args[3]; - buf = kzalloc(bufsize, GFP_KERNEL); - if (buf == NULL) - return -ENOMEM; - } - - if (tf->command == ATA_CMD_SET_FEATURES && - tf->feature == SETFEATURES_XFER && - tf->nsect >= XFER_SW_DMA_0 && - (id[ATA_ID_UDMA_MODES] || - id[ATA_ID_MWDMA_MODES] || - id[ATA_ID_SWDMA_MODES])) { - xfer_rate = args[1]; - if (tf->nsect > XFER_UDMA_2 && !eighty_ninty_three(drive)) { - printk(KERN_WARNING "%s: UDMA speeds >UDMA33 cannot " - "be set\n", drive->name); - goto abort; - } - } - - err = ide_raw_taskfile(drive, &tfargs, buf, args[3]); - - args[0] = tf->status; - args[1] = tf->error; - args[2] = tf->nsect; - - if (!err && xfer_rate) { - /* active-retuning-calls future */ - ide_set_xfer_rate(drive, xfer_rate); - ide_driveid_update(drive); - } -abort: - if (copy_to_user((void __user *)arg, &args, 4)) - err = -EFAULT; - if (buf) { - if (copy_to_user((void __user *)(arg + 4), buf, bufsize)) - err = -EFAULT; - kfree(buf); - } - return err; -} - -static int ide_task_ioctl(ide_drive_t *drive, unsigned cmd, unsigned long arg) -{ - void __user *p = (void __user *)arg; - int err = 0; - u8 args[7]; - ide_task_t task; - - if (copy_from_user(args, p, 7)) - return -EFAULT; - - memset(&task, 0, sizeof(task)); - memcpy(&task.tf_array[7], &args[1], 6); - task.tf.command = args[0]; - task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; - - err = ide_no_data_taskfile(drive, &task); - - args[0] = task.tf.command; - memcpy(&args[1], &task.tf_array[7], 6); - - if (copy_to_user(p, args, 7)) - err = -EFAULT; - - return err; -} - -static int generic_drive_reset(ide_drive_t *drive) -{ - struct request *rq; - int ret = 0; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_len = 1; - rq->cmd[0] = REQ_DRIVE_RESET; - rq->cmd_flags |= REQ_SOFTBARRIER; - if (blk_execute_rq(drive->queue, NULL, rq, 1)) - ret = rq->errors; - blk_put_request(rq); - return ret; -} - -int generic_ide_ioctl(ide_drive_t *drive, struct file *file, - struct block_device *bdev, - unsigned int cmd, unsigned long arg) -{ - int err; - - err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_ioctl_settings); - if (err != -EOPNOTSUPP) - return err; - - switch (cmd) { - case HDIO_OBSOLETE_IDENTITY: - case HDIO_GET_IDENTITY: - if (bdev != bdev->bd_contains) - return -EINVAL; - return ide_get_identity_ioctl(drive, cmd, arg); - case HDIO_GET_NICE: - return ide_get_nice_ioctl(drive, arg); - case HDIO_SET_NICE: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - return ide_set_nice_ioctl(drive, arg); -#ifdef CONFIG_IDE_TASK_IOCTL - case HDIO_DRIVE_TASKFILE: - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) - return -EACCES; - if (drive->media == ide_disk) - return ide_taskfile_ioctl(drive, cmd, arg); - return -ENOMSG; -#endif - case HDIO_DRIVE_CMD: - if (!capable(CAP_SYS_RAWIO)) - return -EACCES; - return ide_cmd_ioctl(drive, cmd, arg); - case HDIO_DRIVE_TASK: - if (!capable(CAP_SYS_RAWIO)) - return -EACCES; - return ide_task_ioctl(drive, cmd, arg); - case HDIO_DRIVE_RESET: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - return generic_drive_reset(drive); - case HDIO_GET_BUSSTATE: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (put_user(BUSSTATE_ON, (long __user *)arg)) - return -EFAULT; - return 0; - case HDIO_SET_BUSSTATE: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - return -EOPNOTSUPP; - default: - return -EINVAL; - } -} -EXPORT_SYMBOL(generic_ide_ioctl); diff --git a/trunk/drivers/ide/ide-iops.c b/trunk/drivers/ide/ide-iops.c index 0a2fd3b37ac4..2cbadffe922e 100644 --- a/trunk/drivers/ide/ide-iops.c +++ b/trunk/drivers/ide/ide-iops.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -399,14 +400,97 @@ const struct ide_tp_ops default_tp_ops = { .output_data = ide_output_data, }; -void ide_fix_driveid(u16 *id) +void ide_fix_driveid (struct hd_driveid *id) { #ifndef __LITTLE_ENDIAN # ifdef __BIG_ENDIAN int i; - - for (i = 0; i < 256; i++) - id[i] = __le16_to_cpu(id[i]); + u16 *stringcast; + + id->config = __le16_to_cpu(id->config); + id->cyls = __le16_to_cpu(id->cyls); + id->reserved2 = __le16_to_cpu(id->reserved2); + id->heads = __le16_to_cpu(id->heads); + id->track_bytes = __le16_to_cpu(id->track_bytes); + id->sector_bytes = __le16_to_cpu(id->sector_bytes); + id->sectors = __le16_to_cpu(id->sectors); + id->vendor0 = __le16_to_cpu(id->vendor0); + id->vendor1 = __le16_to_cpu(id->vendor1); + id->vendor2 = __le16_to_cpu(id->vendor2); + stringcast = (u16 *)&id->serial_no[0]; + for (i = 0; i < (20/2); i++) + stringcast[i] = __le16_to_cpu(stringcast[i]); + id->buf_type = __le16_to_cpu(id->buf_type); + id->buf_size = __le16_to_cpu(id->buf_size); + id->ecc_bytes = __le16_to_cpu(id->ecc_bytes); + stringcast = (u16 *)&id->fw_rev[0]; + for (i = 0; i < (8/2); i++) + stringcast[i] = __le16_to_cpu(stringcast[i]); + stringcast = (u16 *)&id->model[0]; + for (i = 0; i < (40/2); i++) + stringcast[i] = __le16_to_cpu(stringcast[i]); + id->dword_io = __le16_to_cpu(id->dword_io); + id->reserved50 = __le16_to_cpu(id->reserved50); + id->field_valid = __le16_to_cpu(id->field_valid); + id->cur_cyls = __le16_to_cpu(id->cur_cyls); + id->cur_heads = __le16_to_cpu(id->cur_heads); + id->cur_sectors = __le16_to_cpu(id->cur_sectors); + id->cur_capacity0 = __le16_to_cpu(id->cur_capacity0); + id->cur_capacity1 = __le16_to_cpu(id->cur_capacity1); + id->lba_capacity = __le32_to_cpu(id->lba_capacity); + id->dma_1word = __le16_to_cpu(id->dma_1word); + id->dma_mword = __le16_to_cpu(id->dma_mword); + id->eide_pio_modes = __le16_to_cpu(id->eide_pio_modes); + id->eide_dma_min = __le16_to_cpu(id->eide_dma_min); + id->eide_dma_time = __le16_to_cpu(id->eide_dma_time); + id->eide_pio = __le16_to_cpu(id->eide_pio); + id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy); + for (i = 0; i < 2; ++i) + id->words69_70[i] = __le16_to_cpu(id->words69_70[i]); + for (i = 0; i < 4; ++i) + id->words71_74[i] = __le16_to_cpu(id->words71_74[i]); + id->queue_depth = __le16_to_cpu(id->queue_depth); + for (i = 0; i < 4; ++i) + id->words76_79[i] = __le16_to_cpu(id->words76_79[i]); + id->major_rev_num = __le16_to_cpu(id->major_rev_num); + id->minor_rev_num = __le16_to_cpu(id->minor_rev_num); + id->command_set_1 = __le16_to_cpu(id->command_set_1); + id->command_set_2 = __le16_to_cpu(id->command_set_2); + id->cfsse = __le16_to_cpu(id->cfsse); + id->cfs_enable_1 = __le16_to_cpu(id->cfs_enable_1); + id->cfs_enable_2 = __le16_to_cpu(id->cfs_enable_2); + id->csf_default = __le16_to_cpu(id->csf_default); + id->dma_ultra = __le16_to_cpu(id->dma_ultra); + id->trseuc = __le16_to_cpu(id->trseuc); + id->trsEuc = __le16_to_cpu(id->trsEuc); + id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues); + id->mprc = __le16_to_cpu(id->mprc); + id->hw_config = __le16_to_cpu(id->hw_config); + id->acoustic = __le16_to_cpu(id->acoustic); + id->msrqs = __le16_to_cpu(id->msrqs); + id->sxfert = __le16_to_cpu(id->sxfert); + id->sal = __le16_to_cpu(id->sal); + id->spg = __le32_to_cpu(id->spg); + id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2); + for (i = 0; i < 22; i++) + id->words104_125[i] = __le16_to_cpu(id->words104_125[i]); + id->last_lun = __le16_to_cpu(id->last_lun); + id->word127 = __le16_to_cpu(id->word127); + id->dlf = __le16_to_cpu(id->dlf); + id->csfo = __le16_to_cpu(id->csfo); + for (i = 0; i < 26; i++) + id->words130_155[i] = __le16_to_cpu(id->words130_155[i]); + id->word156 = __le16_to_cpu(id->word156); + for (i = 0; i < 3; i++) + id->words157_159[i] = __le16_to_cpu(id->words157_159[i]); + id->cfa_power = __le16_to_cpu(id->cfa_power); + for (i = 0; i < 15; i++) + id->words161_175[i] = __le16_to_cpu(id->words161_175[i]); + for (i = 0; i < 30; i++) + id->words176_205[i] = __le16_to_cpu(id->words176_205[i]); + for (i = 0; i < 49; i++) + id->words206_254[i] = __le16_to_cpu(id->words206_254[i]); + id->integrity_word = __le16_to_cpu(id->integrity_word); # else # error "Please fix " # endif @@ -417,21 +501,19 @@ void ide_fix_driveid(u16 *id) * ide_fixstring() cleans up and (optionally) byte-swaps a text string, * removing leading/trailing blanks and compressing internal blanks. * It is primarily used to tidy up the model name/number fields as - * returned by the ATA_CMD_ID_ATA[PI] commands. + * returned by the WIN_[P]IDENTIFY commands. */ void ide_fixstring (u8 *s, const int bytecount, const int byteswap) { - u8 *p, *end = &s[bytecount & ~1]; /* bytecount must be even */ + u8 *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */ if (byteswap) { /* convert from big-endian to host byte order */ - for (p = s ; p != end ; p += 2) - be16_to_cpus((u16 *) p); + for (p = end ; p != s;) + be16_to_cpus((u16 *)(p -= 2)); } - /* strip leading blanks */ - p = s; while (s != end && *s == ' ') ++s; /* compress internal blanks and strip trailing blanks */ @@ -474,7 +556,7 @@ int drive_is_ready (ide_drive_t *drive) /* Note: this may clear a pending IRQ!! */ stat = hwif->tp_ops->read_status(hwif); - if (stat & ATA_BUSY) + if (stat & BUSY_STAT) /* drive busy: definitely not interrupting */ return 0; @@ -506,10 +588,10 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti udelay(1); /* spec allows drive 400ns to assert "BUSY" */ stat = tp_ops->read_status(hwif); - if (stat & ATA_BUSY) { + if (stat & BUSY_STAT) { local_irq_set(flags); timeout += jiffies; - while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) { + while ((stat = tp_ops->read_status(hwif)) & BUSY_STAT) { if (time_after(jiffies, timeout)) { /* * One last read after the timeout in case @@ -517,7 +599,7 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti * progress during the timeout.. */ stat = tp_ops->read_status(hwif); - if ((stat & ATA_BUSY) == 0) + if (!(stat & BUSY_STAT)) break; local_irq_restore(flags); @@ -578,18 +660,18 @@ EXPORT_SYMBOL(ide_wait_stat); /** * ide_in_drive_list - look for drive in black/white list * @id: drive identifier - * @table: list to inspect + * @drive_table: list to inspect * * Look for a drive in the blacklist and the whitelist tables * Returns 1 if the drive is found in the table. */ -int ide_in_drive_list(u16 *id, const struct drive_list_entry *table) +int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) { - for ( ; table->id_model; table++) - if ((!strcmp(table->id_model, (char *)&id[ATA_ID_PROD])) && - (!table->id_firmware || - strstr((char *)&id[ATA_ID_FW_REV], table->id_firmware))) + for ( ; drive_table->id_model; drive_table++) + if ((!strcmp(drive_table->id_model, id->model)) && + (!drive_table->id_firmware || + strstr(id->fw_rev, drive_table->id_firmware))) return 1; return 0; } @@ -620,7 +702,7 @@ static const struct drive_list_entry ivb_list[] = { u8 eighty_ninty_three (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - u16 *id = drive->id; + struct hd_driveid *id = drive->id; int ivb = ide_in_drive_list(id, ivb_list); if (hwif->cbl == ATA_CBL_PATA40_SHORT) @@ -630,7 +712,7 @@ u8 eighty_ninty_three (ide_drive_t *drive) printk(KERN_DEBUG "%s: skipping word 93 validity check\n", drive->name); - if (ata_id_is_sata(id) && !ivb) + if (ide_dev_is_sata(id) && !ivb) return 1; if (hwif->cbl != ATA_CBL_PATA80 && !ivb) @@ -642,8 +724,7 @@ u8 eighty_ninty_three (ide_drive_t *drive) * - force bit13 (80c cable present) check also for !ivb devices * (unless the slave device is pre-ATA3) */ - if ((id[ATA_ID_HW_CONFIG] & 0x4000) || - (ivb && (id[ATA_ID_HW_CONFIG] & 0x2000))) + if ((id->hw_config & 0x4000) || (ivb && (id->hw_config & 0x2000))) return 1; no_80w: @@ -664,8 +745,8 @@ int ide_driveid_update(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - u16 *id; - unsigned long flags; + struct hd_driveid *id; + unsigned long timeout, flags; u8 stat; /* @@ -676,24 +757,29 @@ int ide_driveid_update(ide_drive_t *drive) SELECT_MASK(drive, 1); tp_ops->set_irq(hwif, 0); msleep(50); - tp_ops->exec_command(hwif, ATA_CMD_ID_ATA); + tp_ops->exec_command(hwif, WIN_IDENTIFY); + timeout = jiffies + WAIT_WORSTCASE; + do { + if (time_after(jiffies, timeout)) { + SELECT_MASK(drive, 0); + return 0; /* drive timed-out */ + } - if (ide_busy_sleep(hwif, WAIT_WORSTCASE, 1)) { - SELECT_MASK(drive, 0); - return 0; - } + msleep(50); /* give drive a breather */ + stat = tp_ops->read_altstatus(hwif); + } while (stat & BUSY_STAT); - msleep(50); /* wait for IRQ and ATA_DRQ */ + msleep(50); /* wait for IRQ and DRQ_STAT */ stat = tp_ops->read_status(hwif); - if (!OK_STAT(stat, ATA_DRQ, BAD_R_STAT)) { + if (!OK_STAT(stat, DRQ_STAT, BAD_R_STAT)) { SELECT_MASK(drive, 0); printk("%s: CHECK for good STATUS\n", drive->name); return 0; } local_irq_save(flags); SELECT_MASK(drive, 0); - id = kmalloc(SECTOR_SIZE, GFP_ATOMIC); + id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); if (!id) { local_irq_restore(flags); return 0; @@ -703,16 +789,16 @@ int ide_driveid_update(ide_drive_t *drive) local_irq_enable(); local_irq_restore(flags); ide_fix_driveid(id); - - drive->id[ATA_ID_UDMA_MODES] = id[ATA_ID_UDMA_MODES]; - drive->id[ATA_ID_MWDMA_MODES] = id[ATA_ID_MWDMA_MODES]; - drive->id[ATA_ID_SWDMA_MODES] = id[ATA_ID_SWDMA_MODES]; - /* anything more ? */ - - kfree(id); - - if (drive->using_dma && ide_id_dma_bug(drive)) - ide_dma_off(drive); + if (id) { + drive->id->dma_ultra = id->dma_ultra; + drive->id->dma_mword = id->dma_mword; + drive->id->dma_1word = id->dma_1word; + /* anything more ? */ + kfree(id); + + if (drive->using_dma && ide_id_dma_bug(drive)) + ide_dma_off(drive); + } return 1; } @@ -721,7 +807,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) { ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - u16 *id = drive->id, i; int error = 0; u8 stat; ide_task_t task; @@ -732,7 +817,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) #endif /* Skip setting PIO flow-control modes on pre-EIDE drives */ - if ((speed & 0xf8) == XFER_PIO_0 && ata_id_has_iordy(drive->id) == 0) + if ((speed & 0xf8) == XFER_PIO_0 && !(drive->id->capability & 0x08)) goto skip; /* @@ -766,13 +851,13 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) tp_ops->tf_load(drive, &task); - tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES); + tp_ops->exec_command(hwif, WIN_SETFEATURES); if (drive->quirk_list == 2) tp_ops->set_irq(hwif, 1); error = __ide_wait_stat(drive, drive->ready_stat, - ATA_BUSY | ATA_DRQ | ATA_ERR, + BUSY_STAT|DRQ_STAT|ERR_STAT, WAIT_CMD, &stat); SELECT_MASK(drive, 0); @@ -784,9 +869,9 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) return error; } - id[ATA_ID_UDMA_MODES] &= ~0xFF00; - id[ATA_ID_MWDMA_MODES] &= ~0x0F00; - id[ATA_ID_SWDMA_MODES] &= ~0x0F00; + drive->id->dma_ultra &= ~0xFF00; + drive->id->dma_mword &= ~0x0F00; + drive->id->dma_1word &= ~0x0F00; skip: #ifdef CONFIG_BLK_DEV_IDEDMA @@ -796,17 +881,23 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) ide_dma_off_quietly(drive); #endif - if (speed >= XFER_UDMA_0) { - i = 1 << (speed - XFER_UDMA_0); - id[ATA_ID_UDMA_MODES] |= (i << 8 | i); - } else if (speed >= XFER_MW_DMA_0) { - i = 1 << (speed - XFER_MW_DMA_0); - id[ATA_ID_MWDMA_MODES] |= (i << 8 | i); - } else if (speed >= XFER_SW_DMA_0) { - i = 1 << (speed - XFER_SW_DMA_0); - id[ATA_ID_SWDMA_MODES] |= (i << 8 | i); + switch(speed) { + case XFER_UDMA_7: drive->id->dma_ultra |= 0x8080; break; + case XFER_UDMA_6: drive->id->dma_ultra |= 0x4040; break; + case XFER_UDMA_5: drive->id->dma_ultra |= 0x2020; break; + case XFER_UDMA_4: drive->id->dma_ultra |= 0x1010; break; + case XFER_UDMA_3: drive->id->dma_ultra |= 0x0808; break; + case XFER_UDMA_2: drive->id->dma_ultra |= 0x0404; break; + case XFER_UDMA_1: drive->id->dma_ultra |= 0x0202; break; + case XFER_UDMA_0: drive->id->dma_ultra |= 0x0101; break; + case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break; + case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break; + case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break; + case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break; + case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break; + case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break; + default: break; } - if (!drive->init_speed) drive->init_speed = speed; drive->current_speed = speed; @@ -886,7 +977,7 @@ void ide_execute_pkt_cmd(ide_drive_t *drive) unsigned long flags; spin_lock_irqsave(&ide_lock, flags); - hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET); + hwif->tp_ops->exec_command(hwif, WIN_PACKETCMD); ndelay(400); spin_unlock_irqrestore(&ide_lock, flags); } @@ -919,7 +1010,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) udelay (10); stat = hwif->tp_ops->read_status(hwif); - if (OK_STAT(stat, 0, ATA_BUSY)) + if (OK_STAT(stat, 0, BUSY_STAT)) printk("%s: ATAPI reset complete\n", drive->name); else { if (time_before(jiffies, hwgroup->poll_timeout)) { @@ -965,7 +1056,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) tmp = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(tmp, 0, ATA_BUSY)) { + if (!OK_STAT(tmp, 0, BUSY_STAT)) { if (time_before(jiffies, hwgroup->poll_timeout)) { ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); /* continue polling */ @@ -1011,7 +1102,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) static void ide_disk_pre_reset(ide_drive_t *drive) { - int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1; + int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1; drive->special.all = 0; drive->special.b.set_geometry = legacy; @@ -1096,7 +1187,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) pre_reset(drive); SELECT_DRIVE(drive); udelay (20); - tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); + tp_ops->exec_command(hwif, WIN_SRST); ndelay(400); hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; hwgroup->polling = 1; @@ -1179,7 +1270,7 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout) */ mdelay(1); stat = hwif->tp_ops->read_status(hwif); - if ((stat & ATA_BUSY) == 0) + if ((stat & BUSY_STAT) == 0) return 0; /* * Assume a value of 0xff means nothing is connected to diff --git a/trunk/drivers/ide/ide-lib.c b/trunk/drivers/ide/ide-lib.c index ed426dd0fdd8..97fefabea8b8 100644 --- a/trunk/drivers/ide/ide-lib.c +++ b/trunk/drivers/ide/ide-lib.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -89,31 +90,29 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed) u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode) { - u16 *id = drive->id; - int pio_mode = -1, overridden = 0; + int pio_mode; + struct hd_driveid* id = drive->id; + int overridden = 0; if (mode_wanted != 255) return min_t(u8, mode_wanted, max_mode); - if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0) - pio_mode = ide_scan_pio_blacklist((char *)&id[ATA_ID_PROD]); - - if (pio_mode != -1) { + if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0 && + (pio_mode = ide_scan_pio_blacklist(id->model)) != -1) { printk(KERN_INFO "%s: is on PIO blacklist\n", drive->name); } else { - pio_mode = id[ATA_ID_OLD_PIO_MODES] >> 8; + pio_mode = id->tPIO; if (pio_mode > 2) { /* 2 is maximum allowed tPIO value */ pio_mode = 2; overridden = 1; } - - if (id[ATA_ID_FIELD_VALID] & 2) { /* ATA2? */ - if (ata_id_has_iordy(id)) { - if (id[ATA_ID_PIO_MODES] & 7) { + if (id->field_valid & 2) { /* drive implements ATA2? */ + if (id->capability & 8) { /* IORDY supported? */ + if (id->eide_pio_modes & 7) { overridden = 0; - if (id[ATA_ID_PIO_MODES] & 4) + if (id->eide_pio_modes & 4) pio_mode = 5; - else if (id[ATA_ID_PIO_MODES] & 2) + else if (id->eide_pio_modes & 2) pio_mode = 4; else pio_mode = 3; @@ -339,16 +338,16 @@ static void ide_dump_sector(ide_drive_t *drive) static void ide_dump_ata_error(ide_drive_t *drive, u8 err) { printk("{ "); - if (err & ATA_ABORTED) printk("DriveStatusError "); - if (err & ATA_ICRC) - printk((err & ATA_ABORTED) ? "BadCRC " : "BadSector "); - if (err & ATA_UNC) printk("UncorrectableError "); - if (err & ATA_IDNF) printk("SectorIdNotFound "); - if (err & ATA_TRK0NF) printk("TrackZeroNotFound "); - if (err & ATA_AMNF) printk("AddrMarkNotFound "); + if (err & ABRT_ERR) printk("DriveStatusError "); + if (err & ICRC_ERR) + printk((err & ABRT_ERR) ? "BadCRC " : "BadSector "); + if (err & ECC_ERR) printk("UncorrectableError "); + if (err & ID_ERR) printk("SectorIdNotFound "); + if (err & TRK0_ERR) printk("TrackZeroNotFound "); + if (err & MARK_ERR) printk("AddrMarkNotFound "); printk("}"); - if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK || - (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) { + if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || + (err & (ECC_ERR|ID_ERR|MARK_ERR))) { ide_dump_sector(drive); if (HWGROUP(drive) && HWGROUP(drive)->rq) printk(", sector=%llu", @@ -360,12 +359,12 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err) static void ide_dump_atapi_error(ide_drive_t *drive, u8 err) { printk("{ "); - if (err & ATAPI_ILI) printk("IllegalLengthIndication "); - if (err & ATAPI_EOM) printk("EndOfMedia "); - if (err & ATA_ABORTED) printk("AbortedCommand "); - if (err & ATA_MCR) printk("MediaChangeRequested "); - if (err & ATAPI_LFS) printk("LastFailedSense=0x%02x ", - (err & ATAPI_LFS) >> 4); + if (err & ILI_ERR) printk("IllegalLengthIndication "); + if (err & EOM_ERR) printk("EndOfMedia "); + if (err & ABRT_ERR) printk("AbortedCommand "); + if (err & MCR_ERR) printk("MediaChangeRequested "); + if (err & LFS_ERR) printk("LastFailedSense=0x%02x ", + (err & LFS_ERR) >> 4); printk("}\n"); } @@ -387,19 +386,19 @@ u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat) local_irq_save(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); - if (stat & ATA_BUSY) + if (stat & BUSY_STAT) printk("Busy "); else { - if (stat & ATA_DRDY) printk("DriveReady "); - if (stat & ATA_DF) printk("DeviceFault "); - if (stat & ATA_DSC) printk("SeekComplete "); - if (stat & ATA_DRQ) printk("DataRequest "); - if (stat & ATA_CORR) printk("CorrectedError "); - if (stat & ATA_IDX) printk("Index "); - if (stat & ATA_ERR) printk("Error "); + if (stat & READY_STAT) printk("DriveReady "); + if (stat & WRERR_STAT) printk("DeviceFault "); + if (stat & SEEK_STAT) printk("SeekComplete "); + if (stat & DRQ_STAT) printk("DataRequest "); + if (stat & ECC_STAT) printk("CorrectedError "); + if (stat & INDEX_STAT) printk("Index "); + if (stat & ERR_STAT) printk("Error "); } printk("}\n"); - if ((stat & (ATA_BUSY | ATA_ERR)) == ATA_ERR) { + if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) { err = ide_read_error(drive); printk("%s: %s: error=0x%02x ", drive->name, msg, err); if (drive->media == ide_disk) diff --git a/trunk/drivers/ide/ide-probe.c b/trunk/drivers/ide/ide-probe.c index 06575a12b635..70aa86c8807e 100644 --- a/trunk/drivers/ide/ide-probe.c +++ b/trunk/drivers/ide/ide-probe.c @@ -50,54 +50,59 @@ static void generic_id(ide_drive_t *drive) { - u16 *id = drive->id; - - id[ATA_ID_CUR_CYLS] = id[ATA_ID_CYLS] = drive->cyl; - id[ATA_ID_CUR_HEADS] = id[ATA_ID_HEADS] = drive->head; - id[ATA_ID_CUR_SECTORS] = id[ATA_ID_SECTORS] = drive->sect; + drive->id->cyls = drive->cyl; + drive->id->heads = drive->head; + drive->id->sectors = drive->sect; + drive->id->cur_cyls = drive->cyl; + drive->id->cur_heads = drive->head; + drive->id->cur_sectors = drive->sect; } static void ide_disk_init_chs(ide_drive_t *drive) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; /* Extract geometry if we did not already have one for the drive */ if (!drive->cyl || !drive->head || !drive->sect) { - drive->cyl = drive->bios_cyl = id[ATA_ID_CYLS]; - drive->head = drive->bios_head = id[ATA_ID_HEADS]; - drive->sect = drive->bios_sect = id[ATA_ID_SECTORS]; + drive->cyl = drive->bios_cyl = id->cyls; + drive->head = drive->bios_head = id->heads; + drive->sect = drive->bios_sect = id->sectors; } /* Handle logical geometry translation by the drive */ - if (ata_id_current_chs_valid(id)) { - drive->cyl = id[ATA_ID_CUR_CYLS]; - drive->head = id[ATA_ID_CUR_HEADS]; - drive->sect = id[ATA_ID_CUR_SECTORS]; + if ((id->field_valid & 1) && id->cur_cyls && + id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) { + drive->cyl = id->cur_cyls; + drive->head = id->cur_heads; + drive->sect = id->cur_sectors; } /* Use physical geometry if what we have still makes no sense */ - if (drive->head > 16 && id[ATA_ID_HEADS] && id[ATA_ID_HEADS] <= 16) { - drive->cyl = id[ATA_ID_CYLS]; - drive->head = id[ATA_ID_HEADS]; - drive->sect = id[ATA_ID_SECTORS]; + if (drive->head > 16 && id->heads && id->heads <= 16) { + drive->cyl = id->cyls; + drive->head = id->heads; + drive->sect = id->sectors; } } static void ide_disk_init_mult_count(ide_drive_t *drive) { - u16 *id = drive->id; - u8 max_multsect = id[ATA_ID_MAX_MULTSECT] & 0xff; - - if (max_multsect) { - if ((max_multsect / 2) > 1) - id[ATA_ID_MULTSECT] = max_multsect | 0x100; - else - id[ATA_ID_MULTSECT] &= ~0x1ff; - - drive->mult_req = id[ATA_ID_MULTSECT] & 0xff; - - if (drive->mult_req) + struct hd_driveid *id = drive->id; + + drive->mult_count = 0; + if (id->max_multsect) { +#ifdef CONFIG_IDEDISK_MULTI_MODE + id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0; + id->multsect_valid = id->multsect ? 1 : 0; + drive->mult_req = id->multsect_valid ? id->max_multsect : 0; + drive->special.b.set_multmode = drive->mult_req ? 1 : 0; +#else /* original, pre IDE-NFG, per request of AC */ + drive->mult_req = 0; + if (drive->mult_req > id->max_multsect) + drive->mult_req = id->max_multsect; + if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) drive->special.b.set_multmode = 1; +#endif } } @@ -114,10 +119,10 @@ static void ide_disk_init_mult_count(ide_drive_t *drive) static inline void do_identify (ide_drive_t *drive, u8 cmd) { ide_hwif_t *hwif = HWIF(drive); - u16 *id = drive->id; - char *m = (char *)&id[ATA_ID_PROD]; - int bswap = 1, is_cfa; + int bswap = 1; + struct hd_driveid *id; + id = drive->id; /* read 512 bytes of id info */ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); @@ -130,28 +135,27 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) ide_fix_driveid(id); /* - * ATA_CMD_ID_ATA returns little-endian info, - * ATA_CMD_ID_ATAPI *usually* returns little-endian info. + * WIN_IDENTIFY returns little-endian info, + * WIN_PIDENTIFY *usually* returns little-endian info. */ - if (cmd == ATA_CMD_ID_ATAPI) { - if ((m[0] == 'N' && m[1] == 'E') || /* NEC */ - (m[0] == 'F' && m[1] == 'X') || /* Mitsumi */ - (m[0] == 'P' && m[1] == 'i')) /* Pioneer */ + if (cmd == WIN_PIDENTIFY) { + if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */ + || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */ + || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */ /* Vertos drives may still be weird */ - bswap ^= 1; + bswap ^= 1; } - - ide_fixstring(m, ATA_ID_PROD_LEN, bswap); - ide_fixstring((char *)&id[ATA_ID_FW_REV], ATA_ID_FW_REV_LEN, bswap); - ide_fixstring((char *)&id[ATA_ID_SERNO], ATA_ID_SERNO_LEN, bswap); + ide_fixstring(id->model, sizeof(id->model), bswap); + ide_fixstring(id->fw_rev, sizeof(id->fw_rev), bswap); + ide_fixstring(id->serial_no, sizeof(id->serial_no), bswap); /* we depend on this a lot! */ - m[ATA_ID_PROD_LEN - 1] = '\0'; + id->model[sizeof(id->model)-1] = '\0'; - if (strstr(m, "E X A B Y T E N E S T")) + if (strstr(id->model, "E X A B Y T E N E S T")) goto err_misc; - printk(KERN_INFO "%s: %s, ", drive->name, m); + printk(KERN_INFO "%s: %s, ", drive->name, id->model); drive->present = 1; drive->dead = 0; @@ -159,16 +163,16 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) /* * Check for an ATAPI device */ - if (cmd == ATA_CMD_ID_ATAPI) { - u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f; + if (cmd == WIN_PIDENTIFY) { + u8 type = (id->config >> 8) & 0x1f; printk(KERN_CONT "ATAPI "); switch (type) { case ide_floppy: - if (!strstr(m, "CD-ROM")) { - if (!strstr(m, "oppy") && - !strstr(m, "poyp") && - !strstr(m, "ZIP")) + if (!strstr(id->model, "CD-ROM")) { + if (!strstr(id->model, "oppy") && + !strstr(id->model, "poyp") && + !strstr(id->model, "ZIP")) printk(KERN_CONT "cdrom or floppy?, assuming "); if (drive->media != ide_cdrom) { printk(KERN_CONT "FLOPPY"); @@ -182,7 +186,8 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) drive->removable = 1; #ifdef CONFIG_PPC /* kludge for Apple PowerBook internal zip */ - if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) { + if (!strstr(id->model, "CD-ROM") && + strstr(id->model, "ZIP")) { printk(KERN_CONT "FLOPPY"); type = ide_floppy; break; @@ -212,15 +217,18 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) * Not an ATAPI device: looks like a "regular" hard disk */ - is_cfa = ata_id_is_cfa(id); + /* + * 0x848a = CompactFlash device + * These are *not* removable in Linux definition of the term + */ - /* CF devices are *not* removable in Linux definition of the term */ - if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7))) + if ((id->config != 0x848a) && (id->config & (1<<7))) drive->removable = 1; drive->media = ide_disk; - printk(KERN_CONT "%s DISK drive\n", is_cfa ? "CFA" : "ATA"); + printk(KERN_CONT "%s DISK drive\n", + (id->config == 0x848a) ? "CFA" : "ATA"); return; @@ -260,7 +268,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) if (io_ports->ctl_addr) { a = tp_ops->read_altstatus(hwif); s = tp_ops->read_status(hwif); - if ((a ^ s) & ~ATA_IDX) + if ((a ^ s) & ~INDEX_STAT) /* ancient Seagate drives, broken interfaces */ printk(KERN_INFO "%s: probing with STATUS(0x%02x) " "instead of ALTSTATUS(0x%02x)\n", @@ -273,7 +281,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) /* set features register for atapi * identify command to be sure of reply */ - if (cmd == ATA_CMD_ID_ATAPI) { + if (cmd == WIN_PIDENTIFY) { ide_task_t task; memset(&task, 0, sizeof(task)); @@ -286,16 +294,24 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) /* ask drive for ID */ tp_ops->exec_command(hwif, cmd); - timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - - if (ide_busy_sleep(hwif, timeout, use_altstatus)) - return 1; + timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; + timeout += jiffies; + do { + if (time_after(jiffies, timeout)) { + /* drive timed-out */ + return 1; + } + /* give drive a breather */ + msleep(50); + s = use_altstatus ? tp_ops->read_altstatus(hwif) + : tp_ops->read_status(hwif); + } while (s & BUSY_STAT); - /* wait for IRQ and ATA_DRQ */ + /* wait for IRQ and DRQ_STAT */ msleep(50); s = tp_ops->read_status(hwif); - if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) { + if (OK_STAT(s, DRQ_STAT, BAD_R_STAT)) { unsigned long flags; /* local CPU only; some systems need this */ @@ -371,21 +387,19 @@ static int try_to_identify (ide_drive_t *drive, u8 cmd) return retval; } -int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) +static int ide_busy_sleep(ide_hwif_t *hwif) { + unsigned long timeout = jiffies + WAIT_WORSTCASE; u8 stat; - timeout += jiffies; - do { - msleep(50); /* give drive a breather */ - stat = altstatus ? hwif->tp_ops->read_altstatus(hwif) - : hwif->tp_ops->read_status(hwif); - if ((stat & ATA_BUSY) == 0) + msleep(50); + stat = hwif->tp_ops->read_status(hwif); + if ((stat & BUSY_STAT) == 0) return 0; } while (time_before(jiffies, timeout)); - return 1; /* drive timed-out */ + return 1; } static u8 ide_read_device(ide_drive_t *drive) @@ -430,13 +444,13 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (drive->present) { /* avoid waiting for inappropriate probes */ - if (drive->media != ide_disk && cmd == ATA_CMD_ID_ATA) + if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) return 4; } #ifdef DEBUG printk(KERN_INFO "probing for %s: present=%d, media=%d, probetype=%s\n", drive->name, drive->present, drive->media, - (cmd == ATA_CMD_ID_ATA) ? "ATA" : "ATAPI"); + (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI"); #endif /* needed for some systems @@ -450,7 +464,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (drive->select.b.unit != 0) { /* exit with drive0 selected */ SELECT_DRIVE(&hwif->drives[0]); - /* allow ATA_BUSY to assert & clear */ + /* allow BUSY_STAT to assert & clear */ msleep(50); } /* no i/f present: mmm.. this should be a 4 -ml */ @@ -459,8 +473,8 @@ static int do_probe (ide_drive_t *drive, u8 cmd) stat = tp_ops->read_status(hwif); - if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) || - drive->present || cmd == ATA_CMD_ID_ATAPI) { + if (OK_STAT(stat, READY_STAT, BUSY_STAT) || + drive->present || cmd == WIN_PIDENTIFY) { /* send cmd and wait */ if ((rc = try_to_identify(drive, cmd))) { /* failed: try again */ @@ -469,17 +483,17 @@ static int do_probe (ide_drive_t *drive, u8 cmd) stat = tp_ops->read_status(hwif); - if (stat == (ATA_BUSY | ATA_DRDY)) + if (stat == (BUSY_STAT | READY_STAT)) return 4; - if (rc == 1 && cmd == ATA_CMD_ID_ATAPI) { + if (rc == 1 && cmd == WIN_PIDENTIFY) { printk(KERN_ERR "%s: no response (status = 0x%02x), " "resetting drive\n", drive->name, stat); msleep(50); SELECT_DRIVE(drive); msleep(50); - tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); - (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); + tp_ops->exec_command(hwif, WIN_SRST); + (void)ide_busy_sleep(hwif); rc = try_to_identify(drive, cmd); } @@ -512,14 +526,13 @@ static void enable_nest (ide_drive_t *drive) const struct ide_tp_ops *tp_ops = hwif->tp_ops; u8 stat; - printk(KERN_INFO "%s: enabling %s -- ", - hwif->name, (char *)&drive->id[ATA_ID_PROD]); + printk(KERN_INFO "%s: enabling %s -- ", hwif->name, drive->id->model); SELECT_DRIVE(drive); msleep(50); - tp_ops->exec_command(hwif, ATA_EXABYTE_ENABLE_NEST); + tp_ops->exec_command(hwif, EXABYTE_ENABLE_NEST); - if (ide_busy_sleep(hwif, WAIT_WORSTCASE, 0)) { + if (ide_busy_sleep(hwif)) { printk(KERN_CONT "failed (timeout)\n"); return; } @@ -532,6 +545,12 @@ static void enable_nest (ide_drive_t *drive) printk(KERN_CONT "failed (status = 0x%02x)\n", stat); else printk(KERN_CONT "success\n"); + + /* if !(success||timed-out) */ + if (do_probe(drive, WIN_IDENTIFY) >= 2) { + /* look for ATAPI device */ + (void) do_probe(drive, WIN_PIDENTIFY); + } } /** @@ -548,8 +567,6 @@ static void enable_nest (ide_drive_t *drive) static inline u8 probe_for_drive (ide_drive_t *drive) { - char *m; - /* * In order to keep things simple we have an id * block for all drives at all times. If the device @@ -559,34 +576,29 @@ static inline u8 probe_for_drive (ide_drive_t *drive) * Also note that 0 everywhere means "can't do X" */ - drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL); + drive->id = kzalloc(SECTOR_WORDS *4, GFP_KERNEL); drive->id_read = 0; if(drive->id == NULL) { printk(KERN_ERR "ide: out of memory for id data.\n"); return 0; } - - m = (char *)&drive->id[ATA_ID_PROD]; - strcpy(m, "UNKNOWN"); - + strcpy(drive->id->model, "UNKNOWN"); + /* skip probing? */ - if (!drive->noprobe) { -retry: + if (!drive->noprobe) + { /* if !(success||timed-out) */ - if (do_probe(drive, ATA_CMD_ID_ATA) >= 2) + if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* look for ATAPI device */ - (void)do_probe(drive, ATA_CMD_ID_ATAPI); - + (void) do_probe(drive, WIN_PIDENTIFY); + } if (!drive->present) /* drive not found */ return 0; - - if (strstr(m, "E X A B Y T E N E S T")) { + if (strstr(drive->id->model, "E X A B Y T E N E S T")) enable_nest(drive); - goto retry; - } - + /* identification failed? */ if (!drive->id_read) { if (drive->media == ide_disk) { @@ -728,38 +740,36 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) /** * ide_undecoded_slave - look for bad CF adapters - * @dev1: slave device + * @drive1: drive * * Analyse the drives on the interface and attempt to decide if we * have the same drive viewed twice. This occurs with crap CF adapters * and PCMCIA sometimes. */ -void ide_undecoded_slave(ide_drive_t *dev1) +void ide_undecoded_slave(ide_drive_t *drive1) { - ide_drive_t *dev0 = &dev1->hwif->drives[0]; + ide_drive_t *drive0 = &drive1->hwif->drives[0]; - if ((dev1->dn & 1) == 0 || dev0->present == 0) + if ((drive1->dn & 1) == 0 || drive0->present == 0) return; /* If the models don't match they are not the same product */ - if (strcmp((char *)&dev0->id[ATA_ID_PROD], - (char *)&dev1->id[ATA_ID_PROD])) + if (strcmp(drive0->id->model, drive1->id->model)) return; /* Serial numbers do not match */ - if (strncmp((char *)&dev0->id[ATA_ID_SERNO], - (char *)&dev1->id[ATA_ID_SERNO], ATA_ID_SERNO_LEN)) + if (strncmp(drive0->id->serial_no, drive1->id->serial_no, 20)) return; /* No serial number, thankfully very rare for CF */ - if (*(char *)&dev0->id[ATA_ID_SERNO] == 0) + if (drive0->id->serial_no[0] == 0) return; /* Appears to be an IDE flash adapter with decode bugs */ printk(KERN_WARNING "ide-probe: ignoring undecoded slave\n"); - dev1->present = 0; + drive1->present = 0; } EXPORT_SYMBOL_GPL(ide_undecoded_slave); @@ -843,7 +853,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif) if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) drive->no_io_32bit = 1; else - drive->no_io_32bit = drive->id[ATA_ID_DWORD_IO] ? 1 : 0; + drive->no_io_32bit = drive->id->dword_io ? 1 : 0; } } @@ -1027,6 +1037,11 @@ static int init_irq (ide_hwif_t *hwif) ide_hwgroup_t *hwgroup; ide_hwif_t *match = NULL; + + BUG_ON(in_interrupt()); + BUG_ON(irqs_disabled()); + BUG_ON(hwif == NULL); + mutex_lock(&ide_cfg_mtx); hwif->hwgroup = NULL; #if MAX_HWIFS > 1 @@ -1101,8 +1116,7 @@ static int init_irq (ide_hwif_t *hwif) sa = IRQF_SHARED; #endif /* __mc68000__ */ - if (hwif->chipset == ide_pci || hwif->chipset == ide_cmd646 || - hwif->chipset == ide_ali14xx) + if (IDE_CHIPSET_IS_PCI(hwif->chipset)) sa = IRQF_SHARED; if (io_ports->ctl_addr) @@ -1330,6 +1344,8 @@ static void hwif_register_devices(ide_hwif_t *hwif) if (!drive->present) continue; + ide_add_generic_settings(drive); + snprintf(dev->bus_id, BUS_ID_SIZE, "%u.%u", hwif->index, i); dev->parent = &hwif->gendev; dev->bus = &ide_bus_type; @@ -1586,10 +1602,8 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d, if (hws[0]) host->dev[0] = hws[0]->dev; - if (d) { - host->init_chipset = d->init_chipset; + if (d) host->host_flags = d->host_flags; - } return host; } diff --git a/trunk/drivers/ide/ide-proc.c b/trunk/drivers/ide/ide-proc.c index e7030a491463..f66c9c3f6fc6 100644 --- a/trunk/drivers/ide/ide-proc.c +++ b/trunk/drivers/ide/ide-proc.c @@ -12,6 +12,14 @@ * "settings" files. e.g. "cat /proc/ide0/hda/settings" * To write a new value "val" into a specific setting "name", use: * echo "name:val" >/proc/ide/ide0/hda/settings + * + * Also useful, "cat /proc/ide0/hda/[identify, smart_values, + * smart_thresholds, capabilities]" will issue an IDENTIFY / + * PACKET_IDENTIFY / SMART_READ_VALUES / SMART_READ_THRESHOLDS / + * SENSE CAPABILITIES command to /dev/hda, and then dump out the + * returned data as 256 16-bit words. The "hdparm" utility will + * be updated someday soon to use this mechanism. + * */ #include @@ -23,6 +31,7 @@ #include #include #include +#include #include #include @@ -100,14 +109,13 @@ static int proc_ide_read_identify err = taskfile_lib_get_identify(drive, page); if (!err) { - char *out = (char *)page + SECTOR_SIZE; - + char *out = ((char *)page) + (SECTOR_WORDS * 4); page = out; do { out += sprintf(out, "%04x%c", le16_to_cpup(val), (++i & 7) ? ' ' : '\n'); val += 1; - } while (i < SECTOR_SIZE / 2); + } while (i < (SECTOR_WORDS * 2)); len = out - page; } } @@ -115,25 +123,140 @@ static int proc_ide_read_identify } /** - * ide_find_setting - find a specific setting - * @st: setting table pointer + * __ide_add_setting - add an ide setting option + * @drive: drive to use + * @name: setting name + * @rw: true if the function is read write + * @data_type: type of data + * @min: range minimum + * @max: range maximum + * @mul_factor: multiplication scale + * @div_factor: divison scale + * @data: private data field + * @set: setting + * @auto_remove: setting auto removal flag + * + * Removes the setting named from the device if it is present. + * The function takes the settings_lock to protect against + * parallel changes. This function must not be called from IRQ + * context. Returns 0 on success or -1 on failure. + * + * BUGS: This code is seriously over-engineered. There is also + * magic about how the driver specific features are setup. If + * a driver is attached we assume the driver settings are auto + * remove. + */ + +static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set, int auto_remove) +{ + ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL; + + mutex_lock(&ide_setting_mtx); + while ((*p) && strcmp((*p)->name, name) < 0) + p = &((*p)->next); + if ((setting = kzalloc(sizeof(*setting), GFP_KERNEL)) == NULL) + goto abort; + if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL) + goto abort; + strcpy(setting->name, name); + setting->rw = rw; + setting->data_type = data_type; + setting->min = min; + setting->max = max; + setting->mul_factor = mul_factor; + setting->div_factor = div_factor; + setting->data = data; + setting->set = set; + + setting->next = *p; + if (auto_remove) + setting->auto_remove = 1; + *p = setting; + mutex_unlock(&ide_setting_mtx); + return 0; +abort: + mutex_unlock(&ide_setting_mtx); + kfree(setting); + return -1; +} + +int ide_add_setting(ide_drive_t *drive, const char *name, int rw, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set) +{ + return __ide_add_setting(drive, name, rw, data_type, min, max, mul_factor, div_factor, data, set, 1); +} + +EXPORT_SYMBOL(ide_add_setting); + +/** + * __ide_remove_setting - remove an ide setting option + * @drive: drive to use + * @name: setting name + * + * Removes the setting named from the device if it is present. + * The caller must hold the setting semaphore. + */ + +static void __ide_remove_setting(ide_drive_t *drive, char *name) +{ + ide_settings_t **p, *setting; + + p = (ide_settings_t **) &drive->settings; + + while ((*p) && strcmp((*p)->name, name)) + p = &((*p)->next); + setting = (*p); + if (setting == NULL) + return; + + (*p) = setting->next; + + kfree(setting->name); + kfree(setting); +} + +/** + * auto_remove_settings - remove driver specific settings + * @drive: drive + * + * Automatically remove all the driver specific settings for this + * drive. This function may not be called from IRQ context. The + * caller must hold ide_setting_mtx. + */ + +static void auto_remove_settings(ide_drive_t *drive) +{ + ide_settings_t *setting; +repeat: + setting = drive->settings; + while (setting) { + if (setting->auto_remove) { + __ide_remove_setting(drive, setting->name); + goto repeat; + } + setting = setting->next; + } +} + +/** + * ide_find_setting_by_name - find a drive specific setting + * @drive: drive to scan * @name: setting name * - * Scan's the setting table for a matching entry and returns + * Scan's the device setting table for a matching entry and returns * this or NULL if no entry is found. The caller must hold the * setting semaphore */ -static -const struct ide_proc_devset *ide_find_setting(const struct ide_proc_devset *st, - char *name) +static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name) { - while (st->name) { - if (strcmp(st->name, name) == 0) + ide_settings_t *setting = drive->settings; + + while (setting) { + if (strcmp(setting->name, name) == 0) break; - st++; + setting = setting->next; } - return st->name ? st : NULL; + return setting; } /** @@ -149,20 +272,26 @@ const struct ide_proc_devset *ide_find_setting(const struct ide_proc_devset *st, * be told apart */ -static int ide_read_setting(ide_drive_t *drive, - const struct ide_proc_devset *setting) +static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting) { - const struct ide_devset *ds = setting->setting; - int val = -EINVAL; - - if (ds->get) { - unsigned long flags; + int val = -EINVAL; + unsigned long flags; + if ((setting->rw & SETTING_READ)) { spin_lock_irqsave(&ide_lock, flags); - val = ds->get(drive); + switch (setting->data_type) { + case TYPE_BYTE: + val = *((u8 *) setting->data); + break; + case TYPE_SHORT: + val = *((u16 *) setting->data); + break; + case TYPE_INT: + val = *((u32 *) setting->data); + break; + } spin_unlock_irqrestore(&ide_lock, flags); } - return val; } @@ -184,23 +313,33 @@ static int ide_read_setting(ide_drive_t *drive, * The current scheme of polling is kludgy, though safe enough. */ -static int ide_write_setting(ide_drive_t *drive, - const struct ide_proc_devset *setting, int val) +static int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val) { - const struct ide_devset *ds = setting->setting; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!ds->set) + if (setting->set) + return setting->set(drive, val); + if (!(setting->rw & SETTING_WRITE)) return -EPERM; - if ((ds->flags & DS_SYNC) - && (val < setting->min || val > setting->max)) + if (val < setting->min || val > setting->max) return -EINVAL; - return ide_devset_execute(drive, ds, val); + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; + switch (setting->data_type) { + case TYPE_BYTE: + *((u8 *) setting->data) = val; + break; + case TYPE_SHORT: + *((u16 *) setting->data) = val; + break; + case TYPE_INT: + *((u32 *) setting->data) = val; + break; + } + spin_unlock_irq(&ide_lock); + return 0; } -ide_devset_get(xfer_rate, current_speed); - static int set_xfer_rate (ide_drive_t *drive, int arg) { ide_task_t task; @@ -210,7 +349,7 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) return -EINVAL; memset(&task, 0, sizeof(task)); - task.tf.command = ATA_CMD_SET_FEATURES; + task.tf.command = WIN_SETFEATURES; task.tf.feature = SETFEATURES_XFER; task.tf.nsect = (u8)arg; task.tf_flags = IDE_TFLAG_OUT_FEATURE | IDE_TFLAG_OUT_NSECT | @@ -225,23 +364,29 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) return err; } -ide_devset_rw(current_speed, xfer_rate); -ide_devset_rw_field(init_speed, init_speed); -ide_devset_rw_field(nice1, nice1); -ide_devset_rw_field(number, dn); - -static const struct ide_proc_devset ide_generic_settings[] = { - IDE_PROC_DEVSET(current_speed, 0, 70), - IDE_PROC_DEVSET(init_speed, 0, 70), - IDE_PROC_DEVSET(io_32bit, 0, 1 + (SUPPORT_VLB_SYNC << 1)), - IDE_PROC_DEVSET(keepsettings, 0, 1), - IDE_PROC_DEVSET(nice1, 0, 1), - IDE_PROC_DEVSET(number, 0, 3), - IDE_PROC_DEVSET(pio_mode, 0, 255), - IDE_PROC_DEVSET(unmaskirq, 0, 1), - IDE_PROC_DEVSET(using_dma, 0, 1), - { 0 }, -}; +/** + * ide_add_generic_settings - generic ide settings + * @drive: drive being configured + * + * Add the generic parts of the system settings to the /proc files. + * The caller must not be holding the ide_setting_mtx. + */ + +void ide_add_generic_settings (ide_drive_t *drive) +{ +/* + * drive setting name read/write access data type min max mul_factor div_factor data pointer set function + */ + __ide_add_setting(drive, "io_32bit", drive->no_io_32bit ? SETTING_READ : SETTING_RW, TYPE_BYTE, 0, 1 + (SUPPORT_VLB_SYNC << 1), 1, 1, &drive->io_32bit, set_io_32bit, 0); + __ide_add_setting(drive, "keepsettings", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->keep_settings, NULL, 0); + __ide_add_setting(drive, "nice1", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->nice1, NULL, 0); + __ide_add_setting(drive, "pio_mode", SETTING_WRITE, TYPE_BYTE, 0, 255, 1, 1, NULL, set_pio_mode, 0); + __ide_add_setting(drive, "unmaskirq", drive->no_unmask ? SETTING_READ : SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->unmask, NULL, 0); + __ide_add_setting(drive, "using_dma", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->using_dma, set_using_dma, 0); + __ide_add_setting(drive, "init_speed", SETTING_RW, TYPE_BYTE, 0, 70, 1, 1, &drive->init_speed, NULL, 0); + __ide_add_setting(drive, "current_speed", SETTING_RW, TYPE_BYTE, 0, 70, 1, 1, &drive->current_speed, set_xfer_rate, 0); + __ide_add_setting(drive, "number", SETTING_RW, TYPE_BYTE, 0, 3, 1, 1, &drive->dn, NULL, 0); +} static void proc_ide_settings_warn(void) { @@ -258,32 +403,19 @@ static void proc_ide_settings_warn(void) static int proc_ide_read_settings (char *page, char **start, off_t off, int count, int *eof, void *data) { - const struct ide_proc_devset *setting, *g, *d; - const struct ide_devset *ds; ide_drive_t *drive = (ide_drive_t *) data; + ide_settings_t *setting = (ide_settings_t *) drive->settings; char *out = page; int len, rc, mul_factor, div_factor; proc_ide_settings_warn(); mutex_lock(&ide_setting_mtx); - g = ide_generic_settings; - d = drive->settings; out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); - while (g->name || (d && d->name)) { - /* read settings in the alphabetical order */ - if (g->name && d && d->name) { - if (strcmp(d->name, g->name) < 0) - setting = d++; - else - setting = g++; - } else if (d && d->name) { - setting = d++; - } else - setting = g++; - mul_factor = setting->mulf ? setting->mulf(drive) : 1; - div_factor = setting->divf ? setting->divf(drive) : 1; + while (setting) { + mul_factor = setting->mul_factor; + div_factor = setting->div_factor; out += sprintf(out, "%-24s", setting->name); rc = ide_read_setting(drive, setting); if (rc >= 0) @@ -291,12 +423,12 @@ static int proc_ide_read_settings else out += sprintf(out, "%-16s", "write-only"); out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); - ds = setting->setting; - if (ds->get) + if (setting->rw & SETTING_READ) out += sprintf(out, "r"); - if (ds->set) + if (setting->rw & SETTING_WRITE) out += sprintf(out, "w"); out += sprintf(out, "\n"); + setting = setting->next; } len = out - page; mutex_unlock(&ide_setting_mtx); @@ -310,10 +442,9 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, { ide_drive_t *drive = (ide_drive_t *) data; char name[MAX_LEN + 1]; - int for_real = 0, mul_factor, div_factor; + int for_real = 0; unsigned long n; - - const struct ide_proc_devset *setting; + ide_settings_t *setting; char *buf, *s; if (!capable(CAP_SYS_ADMIN)) @@ -381,21 +512,13 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, } mutex_lock(&ide_setting_mtx); - /* generic settings first, then driver specific ones */ - setting = ide_find_setting(ide_generic_settings, name); + setting = ide_find_setting_by_name(drive, name); if (!setting) { - if (drive->settings) - setting = ide_find_setting(drive->settings, name); - if (!setting) { - mutex_unlock(&ide_setting_mtx); - goto parse_error; - } - } - if (for_real) { - mul_factor = setting->mulf ? setting->mulf(drive) : 1; - div_factor = setting->divf ? setting->divf(drive) : 1; - ide_write_setting(drive, setting, val * div_factor / mul_factor); + mutex_unlock(&ide_setting_mtx); + goto parse_error; } + if (for_real) + ide_write_setting(drive, setting, val * setting->div_factor / setting->mul_factor); mutex_unlock(&ide_setting_mtx); } } while (!for_real++); @@ -438,10 +561,11 @@ static int proc_ide_read_dmodel (char *page, char **start, off_t off, int count, int *eof, void *data) { ide_drive_t *drive = (ide_drive_t *) data; - char *m = (char *)&drive->id[ATA_ID_PROD]; + struct hd_driveid *id = drive->id; int len; - len = sprintf(page, "%.40s\n", m[0] ? m : "(none)"); + len = sprintf(page, "%.40s\n", + (id && id->model[0]) ? (char *)id->model : "(none)"); PROC_IDE_READ_RETURN(page, start, off, count, eof, len); } @@ -566,10 +690,6 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { - mutex_lock(&ide_setting_mtx); - drive->settings = driver->settings; - mutex_unlock(&ide_setting_mtx); - ide_add_proc_entries(drive->proc, driver->proc, drive); } @@ -606,7 +726,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) * OTOH both ide_{read,write}_setting are only ever used under * ide_setting_mtx. */ - drive->settings = NULL; + auto_remove_settings(drive); spin_unlock_irqrestore(&ide_lock, flags); mutex_unlock(&ide_setting_mtx); } diff --git a/trunk/drivers/ide/ide-tape.c b/trunk/drivers/ide/ide-tape.c index f8c84df4a0bc..3833189144ed 100644 --- a/trunk/drivers/ide/ide-tape.c +++ b/trunk/drivers/ide/ide-tape.c @@ -15,8 +15,6 @@ * Documentation/ide/ChangeLog.ide-tape.1995-2002 */ -#define DRV_NAME "ide-tape" - #define IDETAPE_VERSION "1.20" #include @@ -56,6 +54,8 @@ enum { DBG_CHRDEV = (1 << 2), /* all remaining procedures */ DBG_PROCS = (1 << 3), + /* buffer alloc info (pc_stack & rq_stack) */ + DBG_PCRQ_STACK = (1 << 4), }; /* define to see debug info */ @@ -80,6 +80,26 @@ enum { */ #define IDETAPE_MAX_PC_RETRIES 3 +/* + * With each packet command, we allocate a buffer of IDETAPE_PC_BUFFER_SIZE + * bytes. This is used for several packet commands (Not for READ/WRITE commands) + */ +#define IDETAPE_PC_BUFFER_SIZE 256 + +/* + * In various places in the driver, we need to allocate storage + * for packet commands and requests, which will remain valid while + * we leave the driver to wait for an interrupt or a timeout event. + */ +#define IDETAPE_PC_STACK (10 + IDETAPE_MAX_PC_RETRIES) + +/* + * Some drives (for example, Seagate STT3401A Travan) require a very long + * timeout, because they don't return an interrupt or clear their busy bit + * until after the command completes (even retension commands). + */ +#define IDETAPE_WAIT_CMD (900*HZ) + /* * The following parameter is used to select the point in the internal tape fifo * in which we will start to refill the buffer. Decreasing the following @@ -152,6 +172,20 @@ struct idetape_bh { #define IDETAPE_LU_RETENSION_MASK 2 #define IDETAPE_LU_EOT_MASK 4 +/* + * Special requests for our block device strategy routine. + * + * In order to service a character device command, we add special requests to + * the tail of our block device request queue and wait for their completion. + */ + +enum { + REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */ + REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */ + REQ_IDETAPE_READ = (1 << 2), + REQ_IDETAPE_WRITE = (1 << 3), +}; + /* Error codes returned in rq->errors to the higher part of the driver. */ #define IDETAPE_ERROR_GENERAL 101 #define IDETAPE_ERROR_FILEMARK 102 @@ -172,6 +206,13 @@ typedef struct ide_tape_obj { struct kref kref; /* + * Since a typical character device operation requires more + * than one packet command, we provide here enough memory + * for the maximum of interconnected packet commands. + * The packet commands are stored in the circular array pc_stack. + * pc_stack_index points to the last used entry, and warps around + * to the start when we get to the last array entry. + * * pc points to the current processed packet command. * * failed_pc points to the last failed packet command, or contains @@ -183,11 +224,13 @@ typedef struct ide_tape_obj { struct ide_atapi_pc *pc; /* Last failed packet command */ struct ide_atapi_pc *failed_pc; - /* used by REQ_IDETAPE_{READ,WRITE} requests */ - struct ide_atapi_pc queued_pc; - - struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; + /* Packet command stack */ + struct ide_atapi_pc pc_stack[IDETAPE_PC_STACK]; + /* Next free packet command storage space */ + int pc_stack_index; + struct request rq_stack[IDETAPE_PC_STACK]; + /* We implement a circular array */ + int rq_stack_index; /* * DSC polling variables. @@ -407,6 +450,47 @@ static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) pc->bh = bh; } +/* + * idetape_next_pc_storage returns a pointer to a place in which we can + * safely store a packet command, even though we intend to leave the + * driver. A storage space for a maximum of IDETAPE_PC_STACK packet + * commands is allocated at initialization time. + */ +static struct ide_atapi_pc *idetape_next_pc_storage(ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + + debug_log(DBG_PCRQ_STACK, "pc_stack_index=%d\n", tape->pc_stack_index); + + if (tape->pc_stack_index == IDETAPE_PC_STACK) + tape->pc_stack_index = 0; + return (&tape->pc_stack[tape->pc_stack_index++]); +} + +/* + * idetape_next_rq_storage is used along with idetape_next_pc_storage. + * Since we queue packet commands in the request queue, we need to + * allocate a request, along with the allocation of a packet command. + */ + +/************************************************************** + * * + * This should get fixed to use kmalloc(.., GFP_ATOMIC) * + * followed later on by kfree(). -ml * + * * + **************************************************************/ + +static struct request *idetape_next_rq_storage(ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + + debug_log(DBG_PCRQ_STACK, "rq_stack_index=%d\n", tape->rq_stack_index); + + if (tape->rq_stack_index == IDETAPE_PC_STACK) + tape->rq_stack_index = 0; + return (&tape->rq_stack[tape->rq_stack_index++]); +} + /* * called on each failed packet command retry to analyze the request sense. We * currently do not utilize this information. @@ -583,14 +667,61 @@ static void ide_tape_callback(ide_drive_t *drive) idetape_end_request(drive, uptodate, 0); } +static void idetape_init_pc(struct ide_atapi_pc *pc) +{ + memset(pc->c, 0, 12); + pc->retries = 0; + pc->flags = 0; + pc->req_xfer = 0; + pc->buf = pc->pc_buf; + pc->buf_size = IDETAPE_PC_BUFFER_SIZE; + pc->bh = NULL; + pc->b_data = NULL; +} + static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = REQUEST_SENSE; pc->c[4] = 20; pc->req_xfer = 20; } +static void idetape_init_rq(struct request *rq, u8 cmd) +{ + blk_rq_init(NULL, rq); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd[13] = cmd; +} + +/* + * Generate a new packet command request in front of the request queue, before + * the current request, so that it will be processed immediately, on the next + * pass through the driver. The function below is called from the request + * handling part of the driver (the "bottom" part). Safe storage for the request + * should be allocated with ide_tape_next_{pc,rq}_storage() prior to that. + * + * Memory for those requests is pre-allocated at initialization time, and is + * limited to IDETAPE_PC_STACK requests. We assume that we have enough space for + * the maximum possible number of inter-dependent packet commands. + * + * The higher level of the driver - The ioctl handler and the character device + * handling functions should queue request to the lower level part and wait for + * their completion using idetape_queue_pc_tail or idetape_queue_rw_tail. + */ +static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc, + struct request *rq) +{ + struct ide_tape_obj *tape = drive->driver_data; + + idetape_init_rq(rq, REQ_IDETAPE_PC1); + rq->cmd_flags |= REQ_PREEMPT; + rq->buffer = (char *) pc; + rq->rq_disk = tape->disk; + memcpy(rq->cmd, pc->c, 12); + ide_do_drive_cmd(drive, rq); +} + /* * idetape_retry_pc is called when an error was detected during the * last packet command. We queue a request sense packet command in @@ -598,14 +729,15 @@ static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) */ static void idetape_retry_pc(ide_drive_t *drive) { - struct ide_tape_obj *tape = drive->driver_data; - struct request *rq = &tape->request_sense_rq; - struct ide_atapi_pc *pc = &tape->request_sense_pc; + struct ide_atapi_pc *pc; + struct request *rq; (void)ide_read_error(drive); + pc = idetape_next_pc_storage(drive); + rq = idetape_next_rq_storage(drive); idetape_create_request_sense_cmd(pc); set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, tape->disk, pc, rq); + idetape_queue_pc_head(drive, pc, rq); } /* @@ -634,15 +766,13 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) idetape_postpone_request(drive); } -static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, +static void ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount, int write) { if (write) idetape_output_buffers(drive, pc, bcount); else idetape_input_buffers(drive, pc, bcount); - - return bcount; } /* @@ -656,7 +786,7 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - return ide_pc_intr(drive, tape->pc, idetape_pc_intr, WAIT_TAPE_CMD, + return ide_pc_intr(drive, tape->pc, idetape_pc_intr, IDETAPE_WAIT_CMD, NULL, idetape_update_buffers, idetape_retry_pc, ide_tape_handle_dsc, ide_tape_io_buffers); } @@ -702,7 +832,7 @@ static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive) idetape_tape_t *tape = drive->driver_data; return ide_transfer_pc(drive, tape->pc, idetape_pc_intr, - WAIT_TAPE_CMD, NULL); + IDETAPE_WAIT_CMD, NULL); } static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, @@ -751,13 +881,13 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, pc->retries++; return ide_issue_pc(drive, pc, idetape_transfer_pc, - WAIT_TAPE_CMD, NULL); + IDETAPE_WAIT_CMD, NULL); } /* A mode sense command is used to "sense" tape parameters. */ static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = MODE_SENSE; if (page_code != IDETAPE_BLOCK_DESCRIPTOR) /* DBD = 1 - Don't return block descriptors */ @@ -790,8 +920,8 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) stat = hwif->tp_ops->read_status(hwif); - if (stat & ATA_DSC) { - if (stat & ATA_ERR) { + if (stat & SEEK_STAT) { + if (stat & ERR_STAT) { /* Error detected */ if (pc->c[0] != TEST_UNIT_READY) printk(KERN_ERR "ide-tape: %s: I/O error, ", @@ -816,7 +946,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, struct idetape_bh *bh = (struct idetape_bh *)rq->special; unsigned int length = rq->current_nr_sectors; - ide_init_pc(pc); + idetape_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; pc->bh = bh; @@ -848,10 +978,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, struct request *postponed_rq = tape->postponed_rq; u8 stat; - debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," - " current_nr_sectors: %u\n", - (unsigned long long)rq->sector, rq->nr_sectors, - rq->current_nr_sectors); + debug_log(DBG_SENSE, "sector: %ld, nr_sectors: %ld," + " current_nr_sectors: %d\n", + rq->sector, rq->nr_sectors, rq->current_nr_sectors); if (!blk_special_request(rq)) { /* We do not support buffer cache originated requests. */ @@ -892,7 +1021,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, } if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) && - (stat & ATA_DSC) == 0) { + (stat & SEEK_STAT) == 0) { if (postponed_rq == NULL) { tape->dsc_polling_start = jiffies; tape->dsc_poll_freq = tape->best_dsc_rw_freq; @@ -914,12 +1043,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, return ide_stopped; } if (rq->cmd[13] & REQ_IDETAPE_READ) { - pc = &tape->queued_pc; + pc = idetape_next_pc_storage(drive); ide_tape_create_rw_cmd(tape, pc, rq, READ_6); goto out; } if (rq->cmd[13] & REQ_IDETAPE_WRITE) { - pc = &tape->queued_pc; + pc = idetape_next_pc_storage(drive); ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6); goto out; } @@ -1106,30 +1235,77 @@ static void idetape_init_merge_buffer(idetape_tape_t *tape) static void idetape_create_write_filemark_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc, int write_filemark) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = WRITE_FILEMARKS; pc->c[4] = write_filemark; pc->flags |= PC_FLAG_WAIT_FOR_DSC; } +static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc) +{ + idetape_init_pc(pc); + pc->c[0] = TEST_UNIT_READY; +} + +/* + * We add a special packet command request to the tail of the request queue, and + * wait for it to be serviced. This is not to be called from within the request + * handling part of the driver! We allocate here data on the stack and it is + * valid until the request is finished. This is not the case for the bottom part + * of the driver, where we are always leaving the functions to wait for an + * interrupt or a timer event. + * + * From the bottom part of the driver, we should allocate safe memory using + * idetape_next_pc_storage() and ide_tape_next_rq_storage(), and add the request + * to the request list without waiting for it to be serviced! In that case, we + * usually use idetape_queue_pc_head(). + */ +static int idetape_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) +{ + struct ide_tape_obj *tape = drive->driver_data; + struct request *rq; + int error; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd[13] = REQ_IDETAPE_PC1; + rq->buffer = (char *)pc; + memcpy(rq->cmd, pc->c, 12); + error = blk_execute_rq(drive->queue, tape->disk, rq, 0); + blk_put_request(rq); + return error; +} + +static void idetape_create_load_unload_cmd(ide_drive_t *drive, + struct ide_atapi_pc *pc, int cmd) +{ + idetape_init_pc(pc); + pc->c[0] = START_STOP; + pc->c[4] = cmd; + pc->flags |= PC_FLAG_WAIT_FOR_DSC; +} + static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout) { idetape_tape_t *tape = drive->driver_data; - struct gendisk *disk = tape->disk; + struct ide_atapi_pc pc; int load_attempted = 0; /* Wait for the tape to become ready */ set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); timeout += jiffies; while (time_before(jiffies, timeout)) { - if (ide_do_test_unit_ready(drive, disk) == 0) + idetape_create_test_unit_ready_cmd(&pc); + if (!idetape_queue_pc_tail(drive, &pc)) return 0; if ((tape->sense_key == 2 && tape->asc == 4 && tape->ascq == 2) || (tape->asc == 0x3A)) { /* no media */ if (load_attempted) return -ENOMEDIUM; - ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK); + idetape_create_load_unload_cmd(drive, &pc, + IDETAPE_LU_LOAD_MASK); + idetape_queue_pc_tail(drive, &pc); load_attempted = 1; /* not about to be ready */ } else if (!(tape->sense_key == 2 && tape->asc == 4 && @@ -1142,12 +1318,11 @@ static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout) static int idetape_flush_tape_buffers(ide_drive_t *drive) { - struct ide_tape_obj *tape = drive->driver_data; struct ide_atapi_pc pc; int rc; idetape_create_write_filemark_cmd(drive, &pc, 0); - rc = ide_queue_pc_tail(drive, tape->disk, &pc); + rc = idetape_queue_pc_tail(drive, &pc); if (rc) return rc; idetape_wait_ready(drive, 60 * 5 * HZ); @@ -1156,7 +1331,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive) static void idetape_create_read_position_cmd(struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = READ_POSITION; pc->req_xfer = 20; } @@ -1170,7 +1345,7 @@ static int idetape_read_position(ide_drive_t *drive) debug_log(DBG_PROCS, "Enter %s\n", __func__); idetape_create_read_position_cmd(&pc); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) + if (idetape_queue_pc_tail(drive, &pc)) return -1; position = tape->first_frame; return position; @@ -1180,7 +1355,7 @@ static void idetape_create_locate_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int block, u8 partition, int skip) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = POSITION_TO_ELEMENT; pc->c[1] = 2; put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]); @@ -1188,6 +1363,21 @@ static void idetape_create_locate_cmd(ide_drive_t *drive, pc->flags |= PC_FLAG_WAIT_FOR_DSC; } +static int idetape_create_prevent_cmd(ide_drive_t *drive, + struct ide_atapi_pc *pc, int prevent) +{ + idetape_tape_t *tape = drive->driver_data; + + /* device supports locking according to capabilities page */ + if (!(tape->caps[6] & 0x01)) + return 0; + + idetape_init_pc(pc); + pc->c[0] = ALLOW_MEDIUM_REMOVAL; + pc->c[4] = prevent; + return 1; +} + static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; @@ -1215,7 +1405,6 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block, u8 partition, int skip) { idetape_tape_t *tape = drive->driver_data; - struct gendisk *disk = tape->disk; int retval; struct ide_atapi_pc pc; @@ -1223,12 +1412,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block, __ide_tape_discard_merge_buffer(drive); idetape_wait_ready(drive, 60 * 5 * HZ); idetape_create_locate_cmd(drive, &pc, block, partition, skip); - retval = ide_queue_pc_tail(drive, disk, &pc); + retval = idetape_queue_pc_tail(drive, &pc); if (retval) return (retval); idetape_create_read_position_cmd(&pc); - return ide_queue_pc_tail(drive, disk, &pc); + return (idetape_queue_pc_tail(drive, &pc)); } static void ide_tape_discard_merge_buffer(ide_drive_t *drive, @@ -1288,7 +1477,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = INQUIRY; pc->c[4] = 254; pc->req_xfer = 254; @@ -1297,14 +1486,14 @@ static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc) static void idetape_create_rewind_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = REZERO_UNIT; pc->flags |= PC_FLAG_WAIT_FOR_DSC; } static void idetape_create_erase_cmd(struct ide_atapi_pc *pc) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = ERASE; pc->c[1] = 1; pc->flags |= PC_FLAG_WAIT_FOR_DSC; @@ -1312,7 +1501,7 @@ static void idetape_create_erase_cmd(struct ide_atapi_pc *pc) static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) { - ide_init_pc(pc); + idetape_init_pc(pc); pc->c[0] = SPACE; put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]); pc->c[1] = cmd; @@ -1475,20 +1664,20 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount) */ static int idetape_rewind_tape(ide_drive_t *drive) { - struct ide_tape_obj *tape = drive->driver_data; - struct gendisk *disk = tape->disk; int retval; struct ide_atapi_pc pc; + idetape_tape_t *tape; + tape = drive->driver_data; debug_log(DBG_SENSE, "Enter %s\n", __func__); idetape_create_rewind_cmd(drive, &pc); - retval = ide_queue_pc_tail(drive, disk, &pc); + retval = idetape_queue_pc_tail(drive, &pc); if (retval) return retval; idetape_create_read_position_cmd(&pc); - retval = ide_queue_pc_tail(drive, disk, &pc); + retval = idetape_queue_pc_tail(drive, &pc); if (retval) return retval; return 0; @@ -1531,7 +1720,6 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, int mt_count) { idetape_tape_t *tape = drive->driver_data; - struct gendisk *disk = tape->disk; struct ide_atapi_pc pc; int retval, count = 0; int sprev = !!(tape->caps[4] & 0x20); @@ -1556,7 +1744,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, case MTBSF: idetape_create_space_cmd(&pc, mt_count - count, IDETAPE_SPACE_OVER_FILEMARK); - return ide_queue_pc_tail(drive, disk, &pc); + return idetape_queue_pc_tail(drive, &pc); case MTFSFM: case MTBSFM: if (!sprev) @@ -1745,12 +1933,11 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, static int idetape_write_filemark(ide_drive_t *drive) { - struct ide_tape_obj *tape = drive->driver_data; struct ide_atapi_pc pc; /* Write a filemark */ idetape_create_write_filemark_cmd(drive, &pc, 1); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (idetape_queue_pc_tail(drive, &pc)) { printk(KERN_ERR "ide-tape: Couldn't write a filemark\n"); return -EIO; } @@ -1773,7 +1960,6 @@ static int idetape_write_filemark(ide_drive_t *drive) static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) { idetape_tape_t *tape = drive->driver_data; - struct gendisk *disk = tape->disk; struct ide_atapi_pc pc; int i, retval; @@ -1810,7 +1996,9 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) return 0; case MTLOAD: ide_tape_discard_merge_buffer(drive, 0); - return ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK); + idetape_create_load_unload_cmd(drive, &pc, + IDETAPE_LU_LOAD_MASK); + return idetape_queue_pc_tail(drive, &pc); case MTUNLOAD: case MTOFFL: /* @@ -1818,11 +2006,14 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) * attempting to eject. */ if (tape->door_locked) { - if (!ide_set_media_lock(drive, disk, 0)) - tape->door_locked = DOOR_UNLOCKED; + if (idetape_create_prevent_cmd(drive, &pc, 0)) + if (!idetape_queue_pc_tail(drive, &pc)) + tape->door_locked = DOOR_UNLOCKED; } ide_tape_discard_merge_buffer(drive, 0); - retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK); + idetape_create_load_unload_cmd(drive, &pc, + !IDETAPE_LU_LOAD_MASK); + retval = idetape_queue_pc_tail(drive, &pc); if (!retval) clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); return retval; @@ -1831,15 +2022,16 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) return idetape_flush_tape_buffers(drive); case MTRETEN: ide_tape_discard_merge_buffer(drive, 0); - return ide_do_start_stop(drive, disk, + idetape_create_load_unload_cmd(drive, &pc, IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK); + return idetape_queue_pc_tail(drive, &pc); case MTEOM: idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD); - return ide_queue_pc_tail(drive, disk, &pc); + return idetape_queue_pc_tail(drive, &pc); case MTERASE: (void)idetape_rewind_tape(drive); idetape_create_erase_cmd(&pc); - return ide_queue_pc_tail(drive, disk, &pc); + return idetape_queue_pc_tail(drive, &pc); case MTSETBLK: if (mt_count) { if (mt_count < tape->blk_size || @@ -1860,13 +2052,17 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) case MTFSR: case MTBSR: case MTLOCK: - retval = ide_set_media_lock(drive, disk, 1); + if (!idetape_create_prevent_cmd(drive, &pc, 1)) + return 0; + retval = idetape_queue_pc_tail(drive, &pc); if (retval) return retval; tape->door_locked = DOOR_EXPLICITLY_LOCKED; return 0; case MTUNLOCK: - retval = ide_set_media_lock(drive, disk, 0); + if (!idetape_create_prevent_cmd(drive, &pc, 0)) + return 0; + retval = idetape_queue_pc_tail(drive, &pc); if (retval) return retval; tape->door_locked = DOOR_UNLOCKED; @@ -1948,7 +2144,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive) struct ide_atapi_pc pc; idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (idetape_queue_pc_tail(drive, &pc)) { printk(KERN_ERR "ide-tape: Can't get block descriptor\n"); if (tape->blk_size == 0) { printk(KERN_WARNING "ide-tape: Cannot deal with zero " @@ -1968,6 +2164,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp) unsigned int minor = iminor(inode), i = minor & ~0xc0; ide_drive_t *drive; idetape_tape_t *tape; + struct ide_atapi_pc pc; int retval; if (i >= MAX_HWIFS * MAX_DRIVES) @@ -2030,9 +2227,11 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp) /* Lock the tape drive door so user can't eject. */ if (tape->chrdev_dir == IDETAPE_DIR_NONE) { - if (!ide_set_media_lock(drive, tape->disk, 1)) { - if (tape->door_locked != DOOR_EXPLICITLY_LOCKED) - tape->door_locked = DOOR_LOCKED; + if (idetape_create_prevent_cmd(drive, &pc, 1)) { + if (!idetape_queue_pc_tail(drive, &pc)) { + if (tape->door_locked != DOOR_EXPLICITLY_LOCKED) + tape->door_locked = DOOR_LOCKED; + } } } unlock_kernel(); @@ -2065,6 +2264,7 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp) { struct ide_tape_obj *tape = ide_tape_f(filp); ide_drive_t *drive = tape->drive; + struct ide_atapi_pc pc; unsigned int minor = iminor(inode); lock_kernel(); @@ -2083,8 +2283,10 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp) (void) idetape_rewind_tape(drive); if (tape->chrdev_dir == IDETAPE_DIR_NONE) { if (tape->door_locked == DOOR_LOCKED) { - if (!ide_set_media_lock(drive, tape->disk, 0)) - tape->door_locked = DOOR_UNLOCKED; + if (idetape_create_prevent_cmd(drive, &pc, 0)) { + if (!idetape_queue_pc_tail(drive, &pc)) + tape->door_locked = DOOR_UNLOCKED; + } } } clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags); @@ -2093,6 +2295,45 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp) return 0; } +/* + * check the contents of the ATAPI IDENTIFY command results. We return: + * + * 1 - If the tape can be supported by us, based on the information we have so + * far. + * + * 0 - If this tape driver is not currently supported by us. + */ +static int idetape_identify_device(ide_drive_t *drive) +{ + u8 gcw[2], protocol, device_type, removable, packet_size; + + if (drive->id_read == 0) + return 1; + + *((unsigned short *) &gcw) = drive->id->config; + + protocol = (gcw[1] & 0xC0) >> 6; + device_type = gcw[1] & 0x1F; + removable = !!(gcw[0] & 0x80); + packet_size = gcw[0] & 0x3; + + /* Check that we can support this device */ + if (protocol != 2) + printk(KERN_ERR "ide-tape: Protocol (0x%02x) is not ATAPI\n", + protocol); + else if (device_type != 1) + printk(KERN_ERR "ide-tape: Device type (0x%02x) is not set " + "to tape\n", device_type); + else if (!removable) + printk(KERN_ERR "ide-tape: The removable flag is not set\n"); + else if (packet_size != 0) { + printk(KERN_ERR "ide-tape: Packet size (0x%02x) is not 12" + " bytes\n", packet_size); + } else + return 1; + return 0; +} + static void idetape_get_inquiry_results(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; @@ -2100,7 +2341,7 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) char fw_rev[4], vendor_id[8], product_id[16]; idetape_create_inquiry_cmd(&pc); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (idetape_queue_pc_tail(drive, &pc)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); return; @@ -2129,7 +2370,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) u8 speed, max_speed; idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE); - if (ide_queue_pc_tail(drive, tape->disk, &pc)) { + if (idetape_queue_pc_tail(drive, &pc)) { printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming" " some default values\n"); tape->blk_size = 512; @@ -2161,11 +2402,6 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) } memcpy(&tape->caps, caps, 20); - - /* device lacks locking support according to capabilities page */ - if ((caps[6] & 1) == 0) - drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; - if (caps[7] & 0x02) tape->blk_size = 512; else if (caps[7] & 0x04) @@ -2173,56 +2409,28 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) } #ifdef CONFIG_IDE_PROC_FS -#define ide_tape_devset_get(name, field) \ -static int get_##name(ide_drive_t *drive) \ -{ \ - idetape_tape_t *tape = drive->driver_data; \ - return tape->field; \ -} - -#define ide_tape_devset_set(name, field) \ -static int set_##name(ide_drive_t *drive, int arg) \ -{ \ - idetape_tape_t *tape = drive->driver_data; \ - tape->field = arg; \ - return 0; \ -} - -#define ide_tape_devset_rw_field(_name, _field) \ -ide_tape_devset_get(_name, _field) \ -ide_tape_devset_set(_name, _field) \ -IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name) - -#define ide_tape_devset_r_field(_name, _field) \ -ide_tape_devset_get(_name, _field) \ -IDE_DEVSET(_name, 0, get_##_name, NULL) - -static int mulf_tdsc(ide_drive_t *drive) { return 1000; } -static int divf_tdsc(ide_drive_t *drive) { return HZ; } -static int divf_buffer(ide_drive_t *drive) { return 2; } -static int divf_buffer_size(ide_drive_t *drive) { return 1024; } - -ide_devset_rw_field(dsc_overlap, dsc_overlap); - -ide_tape_devset_rw_field(debug_mask, debug_mask); -ide_tape_devset_rw_field(tdsc, best_dsc_rw_freq); - -ide_tape_devset_r_field(avg_speed, avg_speed); -ide_tape_devset_r_field(speed, caps[14]); -ide_tape_devset_r_field(buffer, caps[16]); -ide_tape_devset_r_field(buffer_size, buffer_size); - -static const struct ide_proc_devset idetape_settings[] = { - __IDE_PROC_DEVSET(avg_speed, 0, 0xffff, NULL, NULL), - __IDE_PROC_DEVSET(buffer, 0, 0xffff, NULL, divf_buffer), - __IDE_PROC_DEVSET(buffer_size, 0, 0xffff, NULL, divf_buffer_size), - __IDE_PROC_DEVSET(debug_mask, 0, 0xffff, NULL, NULL), - __IDE_PROC_DEVSET(dsc_overlap, 0, 1, NULL, NULL), - __IDE_PROC_DEVSET(speed, 0, 0xffff, NULL, NULL), - __IDE_PROC_DEVSET(tdsc, IDETAPE_DSC_RW_MIN, IDETAPE_DSC_RW_MAX, - mulf_tdsc, divf_tdsc), - { 0 }, -}; +static void idetape_add_settings(ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + + ide_add_setting(drive, "buffer", SETTING_READ, TYPE_SHORT, 0, 0xffff, + 1, 2, (u16 *)&tape->caps[16], NULL); + ide_add_setting(drive, "speed", SETTING_READ, TYPE_SHORT, 0, 0xffff, + 1, 1, (u16 *)&tape->caps[14], NULL); + ide_add_setting(drive, "buffer_size", SETTING_READ, TYPE_INT, 0, 0xffff, + 1, 1024, &tape->buffer_size, NULL); + ide_add_setting(drive, "tdsc", SETTING_RW, TYPE_INT, IDETAPE_DSC_RW_MIN, + IDETAPE_DSC_RW_MAX, 1000, HZ, &tape->best_dsc_rw_freq, + NULL); + ide_add_setting(drive, "dsc_overlap", SETTING_RW, TYPE_BYTE, 0, 1, 1, + 1, &drive->dsc_overlap, NULL); + ide_add_setting(drive, "avg_speed", SETTING_READ, TYPE_INT, 0, 0xffff, + 1, 1, &tape->avg_speed, NULL); + ide_add_setting(drive, "debug_mask", SETTING_RW, TYPE_INT, 0, 0xffff, 1, + 1, &tape->debug_mask, NULL); +} +#else +static inline void idetape_add_settings(ide_drive_t *drive) { ; } #endif /* @@ -2254,15 +2462,15 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) drive->dsc_overlap = 0; } /* Seagate Travan drives do not support DSC overlap. */ - if (strstr((char *)&drive->id[ATA_ID_PROD], "Seagate STT3401")) + if (strstr(drive->id->model, "Seagate STT3401")) drive->dsc_overlap = 0; tape->minor = minor; tape->name[0] = 'h'; tape->name[1] = 't'; tape->name[2] = '0' + minor; tape->chrdev_dir = IDETAPE_DIR_NONE; - - *((u16 *)&gcw) = drive->id[ATA_ID_CONFIG]; + tape->pc = tape->pc_stack; + *((unsigned short *) &gcw) = drive->id->config; /* Command packet DRQ type */ if (((gcw[0] & 0x60) >> 5) == 1) @@ -2304,7 +2512,7 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->best_dsc_rw_freq * 1000 / HZ, drive->using_dma ? ", DMA":""); - ide_proc_register_driver(drive, tape->driver); + idetape_add_settings(drive); } static void ide_tape_remove(ide_drive_t *drive) @@ -2369,12 +2577,12 @@ static ide_driver_t idetape_driver = { .remove = ide_tape_remove, .version = IDETAPE_VERSION, .media = ide_tape, + .supports_dsc_overlap = 1, .do_request = idetape_do_request, .end_request = idetape_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idetape_proc, - .settings = idetape_settings, #endif }; @@ -2437,11 +2645,11 @@ static int ide_tape_probe(ide_drive_t *drive) if (!strstr("ide-tape", drive->driver_req)) goto failed; - + if (!drive->present) + goto failed; if (drive->media != ide_tape) goto failed; - - if (drive->id_read == 1 && !ide_check_atapi_device(drive, DRV_NAME)) { + if (!idetape_identify_device(drive)) { printk(KERN_ERR "ide-tape: %s: not supported by this version of" " the driver\n", drive->name); goto failed; @@ -2459,6 +2667,8 @@ static int ide_tape_probe(ide_drive_t *drive) ide_init_disk(g, drive); + ide_proc_register_driver(drive, &idetape_driver); + kref_init(&tape->kref); tape->drive = drive; diff --git a/trunk/drivers/ide/ide-taskfile.c b/trunk/drivers/ide/ide-taskfile.c index 487b18b3ebae..7fb6f1c86272 100644 --- a/trunk/drivers/ide/ide-taskfile.c +++ b/trunk/drivers/ide/ide-taskfile.c @@ -44,9 +44,9 @@ int taskfile_lib_get_identify (ide_drive_t *drive, u8 *buf) memset(&args, 0, sizeof(ide_task_t)); args.tf.nsect = 0x01; if (drive->media == ide_disk) - args.tf.command = ATA_CMD_ID_ATA; + args.tf.command = WIN_IDENTIFY; else - args.tf.command = ATA_CMD_ID_ATAPI; + args.tf.command = WIN_PIDENTIFY; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; args.data_phase = TASKFILE_IN; return ide_raw_taskfile(drive, &args, buf, 1); @@ -99,17 +99,12 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) case TASKFILE_NO_DATA: if (handler == NULL) handler = task_no_data_intr; + /* WIN_{SPECIFY,RESTORE,SETMULT} use custom handlers */ if (task->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) { switch (tf->command) { - case ATA_CMD_INIT_DEV_PARAMS: - handler = set_geometry_intr; - break; - case ATA_CMD_RESTORE: - handler = recal_intr; - break; - case ATA_CMD_SET_MULTI: - handler = set_multmode_intr; - break; + case WIN_SPECIFY: handler = set_geometry_intr; break; + case WIN_RESTORE: handler = recal_intr; break; + case WIN_SETMULT: handler = set_multmode_intr; break; } } ide_execute_command(drive, tf->command, handler, @@ -126,7 +121,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) EXPORT_SYMBOL_GPL(do_rw_taskfile); /* - * set_multmode_intr() is invoked on completion of a ATA_CMD_SET_MULTI cmd. + * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. */ static ide_startstop_t set_multmode_intr(ide_drive_t *drive) { @@ -136,7 +131,7 @@ static ide_startstop_t set_multmode_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); stat = hwif->tp_ops->read_status(hwif); - if (OK_STAT(stat, ATA_DRDY, BAD_STAT)) + if (OK_STAT(stat, READY_STAT, BAD_STAT)) drive->mult_count = drive->mult_req; else { drive->mult_req = drive->mult_count = 0; @@ -147,7 +142,7 @@ static ide_startstop_t set_multmode_intr(ide_drive_t *drive) } /* - * set_geometry_intr() is invoked on completion of a ATA_CMD_INIT_DEV_PARAMS cmd. + * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. */ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) { @@ -159,15 +154,15 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) while (1) { stat = hwif->tp_ops->read_status(hwif); - if ((stat & ATA_BUSY) == 0 || retries-- == 0) + if ((stat & BUSY_STAT) == 0 || retries-- == 0) break; udelay(10); }; - if (OK_STAT(stat, ATA_DRDY, BAD_STAT)) + if (OK_STAT(stat, READY_STAT, BAD_STAT)) return ide_stopped; - if (stat & (ATA_ERR | ATA_DRQ)) + if (stat & (ERR_STAT|DRQ_STAT)) return ide_error(drive, "set_geometry_intr", stat); ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL); @@ -175,7 +170,7 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) } /* - * recal_intr() is invoked on completion of a ATA_CMD_RESTORE (recalibrate) cmd. + * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. */ static ide_startstop_t recal_intr(ide_drive_t *drive) { @@ -185,7 +180,7 @@ static ide_startstop_t recal_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); stat = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) return ide_error(drive, "recal_intr", stat); return ide_stopped; } @@ -202,7 +197,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); stat = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ @@ -225,13 +220,13 @@ static u8 wait_drive_not_busy(ide_drive_t *drive) for (retries = 0; retries < 1000; retries++) { stat = hwif->tp_ops->read_status(hwif); - if (stat & ATA_BUSY) + if (stat & BUSY_STAT) udelay(10); else break; } - if (stat & ATA_BUSY) + if (stat & BUSY_STAT) printk(KERN_ERR "%s: drive still BUSY!\n", drive->name); return stat; @@ -390,7 +385,7 @@ void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat) static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq, u8 stat) { /* Command all done? */ - if (OK_STAT(stat, ATA_DRDY, ATA_BUSY)) { + if (OK_STAT(stat, READY_STAT, BUSY_STAT)) { task_end_request(drive, rq, stat); return ide_stopped; } @@ -410,11 +405,11 @@ static ide_startstop_t task_in_intr(ide_drive_t *drive) u8 stat = hwif->tp_ops->read_status(hwif); /* Error? */ - if (stat & ATA_ERR) + if (stat & ERR_STAT) return task_error(drive, rq, __func__, stat); /* Didn't want any data? Odd. */ - if ((stat & ATA_DRQ) == 0) + if (!(stat & DRQ_STAT)) return task_in_unexpected(drive, rq, stat); ide_pio_datablock(drive, rq, 0); @@ -447,7 +442,7 @@ static ide_startstop_t task_out_intr (ide_drive_t *drive) return task_error(drive, rq, __func__, stat); /* Deal with unexpected ATA data phase. */ - if (((stat & ATA_DRQ) == 0) ^ !hwif->nleft) + if (((stat & DRQ_STAT) == 0) ^ !hwif->nleft) return task_error(drive, rq, __func__, stat); if (!hwif->nleft) { @@ -466,7 +461,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - if (ide_wait_stat(&startstop, drive, ATA_DRQ, + if (ide_wait_stat(&startstop, drive, DRQ_STAT, drive->bad_wstat, WAIT_DRQ)) { printk(KERN_ERR "%s: no DRQ after issuing %sWRITE%s\n", drive->name, @@ -726,3 +721,110 @@ int ide_taskfile_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg) return err; } #endif + +int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg) +{ + u8 *buf = NULL; + int bufsize = 0, err = 0; + u8 args[4], xfer_rate = 0; + ide_task_t tfargs; + struct ide_taskfile *tf = &tfargs.tf; + struct hd_driveid *id = drive->id; + + if (NULL == (void *) arg) { + struct request *rq; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_ATA_TASKFILE; + err = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_put_request(rq); + + return err; + } + + if (copy_from_user(args, (void __user *)arg, 4)) + return -EFAULT; + + memset(&tfargs, 0, sizeof(ide_task_t)); + tf->feature = args[2]; + if (args[0] == WIN_SMART) { + tf->nsect = args[3]; + tf->lbal = args[1]; + tf->lbam = 0x4f; + tf->lbah = 0xc2; + tfargs.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_IN_NSECT; + } else { + tf->nsect = args[1]; + tfargs.tf_flags = IDE_TFLAG_OUT_FEATURE | + IDE_TFLAG_OUT_NSECT | IDE_TFLAG_IN_NSECT; + } + tf->command = args[0]; + tfargs.data_phase = args[3] ? TASKFILE_IN : TASKFILE_NO_DATA; + + if (args[3]) { + tfargs.tf_flags |= IDE_TFLAG_IO_16BIT; + bufsize = SECTOR_WORDS * 4 * args[3]; + buf = kzalloc(bufsize, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + } + + if (tf->command == WIN_SETFEATURES && + tf->feature == SETFEATURES_XFER && + tf->nsect >= XFER_SW_DMA_0 && + (id->dma_ultra || id->dma_mword || id->dma_1word)) { + xfer_rate = args[1]; + if (tf->nsect > XFER_UDMA_2 && !eighty_ninty_three(drive)) { + printk(KERN_WARNING "%s: UDMA speeds >UDMA33 cannot " + "be set\n", drive->name); + goto abort; + } + } + + err = ide_raw_taskfile(drive, &tfargs, buf, args[3]); + + args[0] = tf->status; + args[1] = tf->error; + args[2] = tf->nsect; + + if (!err && xfer_rate) { + /* active-retuning-calls future */ + ide_set_xfer_rate(drive, xfer_rate); + ide_driveid_update(drive); + } +abort: + if (copy_to_user((void __user *)arg, &args, 4)) + err = -EFAULT; + if (buf) { + if (copy_to_user((void __user *)(arg + 4), buf, bufsize)) + err = -EFAULT; + kfree(buf); + } + return err; +} + +int ide_task_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg) +{ + void __user *p = (void __user *)arg; + int err = 0; + u8 args[7]; + ide_task_t task; + + if (copy_from_user(args, p, 7)) + return -EFAULT; + + memset(&task, 0, sizeof(task)); + memcpy(&task.tf_array[7], &args[1], 6); + task.tf.command = args[0]; + task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; + + err = ide_no_data_taskfile(drive, &task); + + args[0] = task.tf.command; + memcpy(&args[1], &task.tf_array[7], 6); + + if (copy_to_user(p, args, 7)) + err = -EFAULT; + + return err; +} diff --git a/trunk/drivers/ide/ide-timings.c b/trunk/drivers/ide/ide-timings.c index 81f527af8fae..8c2f8327f487 100644 --- a/trunk/drivers/ide/ide-timings.c +++ b/trunk/drivers/ide/ide-timings.c @@ -22,6 +22,7 @@ */ #include +#include #include #include @@ -77,15 +78,15 @@ EXPORT_SYMBOL_GPL(ide_timing_find_mode); u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); u16 cycle = 0; - if (id[ATA_ID_FIELD_VALID] & 2) { - if (ata_id_has_iordy(drive->id)) - cycle = id[ATA_ID_EIDE_PIO_IORDY]; + if (id->field_valid & 2) { + if (id->capability & 8) + cycle = id->eide_pio_iordy; else - cycle = id[ATA_ID_EIDE_PIO]; + cycle = id->eide_pio; /* conservative "downgrade" for all pre-ATA2 drives */ if (pio < 3 && cycle < t->cycle) @@ -137,7 +138,7 @@ EXPORT_SYMBOL_GPL(ide_timing_merge); int ide_timing_compute(ide_drive_t *drive, u8 speed, struct ide_timing *t, int T, int UT) { - u16 *id = drive->id; + struct hd_driveid *id = drive->id; struct ide_timing *s, p; /* @@ -156,15 +157,16 @@ int ide_timing_compute(ide_drive_t *drive, u8 speed, * If the drive is an EIDE drive, it can tell us it needs extended * PIO/MWDMA cycle timing. */ - if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */ + if (id && id->field_valid & 2) { /* EIDE drive */ + memset(&p, 0, sizeof(p)); if (speed <= XFER_PIO_2) - p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO]; + p.cycle = p.cyc8b = id->eide_pio; else if (speed <= XFER_PIO_5) - p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY]; + p.cycle = p.cyc8b = id->eide_pio_iordy; else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) - p.cycle = id[ATA_ID_EIDE_DMA_MIN]; + p.cycle = id->eide_dma_min; ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); } diff --git a/trunk/drivers/ide/ide.c b/trunk/drivers/ide/ide.c index 9dcf5aed92cb..772451600e4d 100644 --- a/trunk/drivers/ide/ide.c +++ b/trunk/drivers/ide/ide.c @@ -44,6 +44,8 @@ * inspiration from lots of linux users, esp. hamish@zot.apana.org.au */ +#define _IDE_C /* Tell ide.h it's really us */ + #include #include #include @@ -56,7 +58,6 @@ #include #include #include -#include #include #include @@ -96,6 +97,8 @@ void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) hwif->name[2] = 'e'; hwif->name[3] = '0' + index; + hwif->bus_state = BUSSTATE_ON; + init_completion(&hwif->gendev_rel_comp); hwif->tp_ops = &default_tp_ops; @@ -116,7 +119,7 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) drive->media = ide_disk; drive->select.all = (unit<<4)|0xa0; drive->hwif = hwif; - drive->ready_stat = ATA_DRDY; + drive->ready_stat = READY_STAT; drive->bad_wstat = BAD_W_STAT; drive->special.b.recalibrate = 1; drive->special.b.set_geometry = 1; @@ -250,9 +253,42 @@ void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) DEFINE_MUTEX(ide_setting_mtx); -ide_devset_get(io_32bit, io_32bit); +EXPORT_SYMBOL_GPL(ide_setting_mtx); + +/** + * ide_spin_wait_hwgroup - wait for group + * @drive: drive in the group + * + * Wait for an IDE device group to go non busy and then return + * holding the ide_lock which guards the hwgroup->busy status + * and right to use it. + */ -static int set_io_32bit(ide_drive_t *drive, int arg) +int ide_spin_wait_hwgroup (ide_drive_t *drive) +{ + ide_hwgroup_t *hwgroup = HWGROUP(drive); + unsigned long timeout = jiffies + (3 * HZ); + + spin_lock_irq(&ide_lock); + + while (hwgroup->busy) { + unsigned long lflags; + spin_unlock_irq(&ide_lock); + local_irq_set(lflags); + if (time_after(jiffies, timeout)) { + local_irq_restore(lflags); + printk(KERN_ERR "%s: channel busy\n", drive->name); + return -EBUSY; + } + local_irq_restore(lflags); + spin_lock_irq(&ide_lock); + } + return 0; +} + +EXPORT_SYMBOL(ide_spin_wait_hwgroup); + +int set_io_32bit(ide_drive_t *drive, int arg) { if (drive->no_io_32bit) return -EPERM; @@ -260,39 +296,53 @@ static int set_io_32bit(ide_drive_t *drive, int arg) if (arg < 0 || arg > 1 + (SUPPORT_VLB_SYNC << 1)) return -EINVAL; + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; + drive->io_32bit = arg; + spin_unlock_irq(&ide_lock); + return 0; } -ide_devset_get(ksettings, keep_settings); - static int set_ksettings(ide_drive_t *drive, int arg) { if (arg < 0 || arg > 1) return -EINVAL; + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; drive->keep_settings = arg; + spin_unlock_irq(&ide_lock); return 0; } -ide_devset_get(using_dma, using_dma); - -static int set_using_dma(ide_drive_t *drive, int arg) +int set_using_dma(ide_drive_t *drive, int arg) { #ifdef CONFIG_BLK_DEV_IDEDMA + ide_hwif_t *hwif = drive->hwif; int err = -EPERM; if (arg < 0 || arg > 1) return -EINVAL; - if (ata_id_has_dma(drive->id) == 0) + if (!drive->id || !(drive->id->capability & 1)) goto out; - if (drive->hwif->dma_ops == NULL) + if (hwif->dma_ops == NULL) goto out; + err = -EBUSY; + if (ide_spin_wait_hwgroup(drive)) + goto out; + /* + * set ->busy flag, unlock and let it ride + */ + hwif->hwgroup->busy = 1; + spin_unlock_irq(&ide_lock); + err = 0; if (arg) { @@ -301,6 +351,12 @@ static int set_using_dma(ide_drive_t *drive, int arg) } else ide_dma_off(drive); + /* + * lock, clear ->busy flag and unlock before leaving + */ + spin_lock_irq(&ide_lock); + hwif->hwgroup->busy = 0; + spin_unlock_irq(&ide_lock); out: return err; #else @@ -311,7 +367,7 @@ static int set_using_dma(ide_drive_t *drive, int arg) #endif } -static int set_pio_mode(ide_drive_t *drive, int arg) +int set_pio_mode(ide_drive_t *drive, int arg) { struct request *rq; ide_hwif_t *hwif = drive->hwif; @@ -339,8 +395,6 @@ static int set_pio_mode(ide_drive_t *drive, int arg) return 0; } -ide_devset_get(unmaskirq, unmask); - static int set_unmaskirq(ide_drive_t *drive, int arg) { if (drive->no_unmask) @@ -349,20 +403,14 @@ static int set_unmaskirq(ide_drive_t *drive, int arg) if (arg < 0 || arg > 1) return -EINVAL; + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; drive->unmask = arg; + spin_unlock_irq(&ide_lock); return 0; } -#define ide_gen_devset_rw(_name, _func) \ -__IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func) - -ide_gen_devset_rw(io_32bit, io_32bit); -ide_gen_devset_rw(keepsettings, ksettings); -ide_gen_devset_rw(unmaskirq, unmaskirq); -ide_gen_devset_rw(using_dma, using_dma); -__IDE_DEVSET(pio_mode, 0, NULL, set_pio_mode); - static int generic_ide_suspend(struct device *dev, pm_message_t mesg) { ide_drive_t *drive = dev->driver_data; @@ -438,6 +486,138 @@ static int generic_ide_resume(struct device *dev) return err; } +static int generic_drive_reset(ide_drive_t *drive) +{ + struct request *rq; + int ret = 0; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_len = 1; + rq->cmd[0] = REQ_DRIVE_RESET; + rq->cmd_flags |= REQ_SOFTBARRIER; + if (blk_execute_rq(drive->queue, NULL, rq, 1)) + ret = rq->errors; + blk_put_request(rq); + return ret; +} + +int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev, + unsigned int cmd, unsigned long arg) +{ + unsigned long flags; + ide_driver_t *drv; + void __user *p = (void __user *)arg; + int err = 0, (*setfunc)(ide_drive_t *, int); + u8 *val; + + switch (cmd) { + case HDIO_GET_32BIT: val = &drive->io_32bit; goto read_val; + case HDIO_GET_KEEPSETTINGS: val = &drive->keep_settings; goto read_val; + case HDIO_GET_UNMASKINTR: val = &drive->unmask; goto read_val; + case HDIO_GET_DMA: val = &drive->using_dma; goto read_val; + case HDIO_SET_32BIT: setfunc = set_io_32bit; goto set_val; + case HDIO_SET_KEEPSETTINGS: setfunc = set_ksettings; goto set_val; + case HDIO_SET_PIO_MODE: setfunc = set_pio_mode; goto set_val; + case HDIO_SET_UNMASKINTR: setfunc = set_unmaskirq; goto set_val; + case HDIO_SET_DMA: setfunc = set_using_dma; goto set_val; + } + + switch (cmd) { + case HDIO_OBSOLETE_IDENTITY: + case HDIO_GET_IDENTITY: + if (bdev != bdev->bd_contains) + return -EINVAL; + if (drive->id_read == 0) + return -ENOMSG; + if (copy_to_user(p, drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142)) + return -EFAULT; + return 0; + + case HDIO_GET_NICE: + return put_user(drive->dsc_overlap << IDE_NICE_DSC_OVERLAP | + drive->atapi_overlap << IDE_NICE_ATAPI_OVERLAP | + drive->nice1 << IDE_NICE_1, + (long __user *) arg); +#ifdef CONFIG_IDE_TASK_IOCTL + case HDIO_DRIVE_TASKFILE: + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + switch(drive->media) { + case ide_disk: + return ide_taskfile_ioctl(drive, cmd, arg); + default: + return -ENOMSG; + } +#endif /* CONFIG_IDE_TASK_IOCTL */ + + case HDIO_DRIVE_CMD: + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + return ide_cmd_ioctl(drive, cmd, arg); + + case HDIO_DRIVE_TASK: + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + return ide_task_ioctl(drive, cmd, arg); + case HDIO_SET_NICE: + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1)))) + return -EPERM; + drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1; + drv = *(ide_driver_t **)bdev->bd_disk->private_data; + if (drive->dsc_overlap && !drv->supports_dsc_overlap) { + drive->dsc_overlap = 0; + return -EPERM; + } + drive->nice1 = (arg >> IDE_NICE_1) & 1; + return 0; + case HDIO_DRIVE_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + return generic_drive_reset(drive); + + case HDIO_GET_BUSSTATE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (put_user(HWIF(drive)->bus_state, (long __user *)arg)) + return -EFAULT; + return 0; + + case HDIO_SET_BUSSTATE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return -EOPNOTSUPP; + default: + return -EINVAL; + } + +read_val: + mutex_lock(&ide_setting_mtx); + spin_lock_irqsave(&ide_lock, flags); + err = *val; + spin_unlock_irqrestore(&ide_lock, flags); + mutex_unlock(&ide_setting_mtx); + return err >= 0 ? put_user(err, (long __user *)arg) : err; + +set_val: + if (bdev != bdev->bd_contains) + err = -EINVAL; + else { + if (!capable(CAP_SYS_ADMIN)) + err = -EACCES; + else { + mutex_lock(&ide_setting_mtx); + err = setfunc(drive, arg); + mutex_unlock(&ide_setting_mtx); + } + } + return err; +} + +EXPORT_SYMBOL(generic_ide_ioctl); + /** * ide_device_get - get an additional reference to a ide_drive_t * @drive: device to get a reference to @@ -530,21 +710,21 @@ static ssize_t model_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]); + return sprintf(buf, "%s\n", drive->id->model); } static ssize_t firmware_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]); + return sprintf(buf, "%s\n", drive->id->fw_rev); } static ssize_t serial_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]); + return sprintf(buf, "%s\n", drive->id->serial_no); } static struct device_attribute ide_dev_attrs[] = { @@ -661,7 +841,7 @@ MODULE_PARM_DESC(noprobe, "skip probing for a device"); static unsigned int ide_nowerr; module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0); -MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device"); +MODULE_PARM_DESC(nowerr, "ignore the WRERR_STAT bit for a device"); static unsigned int ide_cdroms; @@ -726,7 +906,7 @@ static void ide_dev_apply_params(ide_drive_t *drive) drive->noprobe = 1; } if (ide_nowerr & (1 << i)) { - printk(KERN_INFO "ide: ignoring the ATA_DF bit for %s\n", + printk(KERN_INFO "ide: ignoring the WRERR_STAT bit for %s\n", drive->name); drive->bad_wstat = BAD_R_STAT; } @@ -747,7 +927,7 @@ static void ide_dev_apply_params(ide_drive_t *drive) drive->cyl, drive->head, drive->sect); drive->present = 1; drive->media = ide_disk; - drive->ready_stat = ATA_DRDY; + drive->ready_stat = READY_STAT; } } diff --git a/trunk/drivers/ide/legacy/ali14xx.c b/trunk/drivers/ide/legacy/ali14xx.c index 7276c96aaa2a..4ec19737f3c5 100644 --- a/trunk/drivers/ide/legacy/ali14xx.c +++ b/trunk/drivers/ide/legacy/ali14xx.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/legacy/buddha.c b/trunk/drivers/ide/legacy/buddha.c index c5a3c9ef6a5d..7c2afa97f417 100644 --- a/trunk/drivers/ide/legacy/buddha.c +++ b/trunk/drivers/ide/legacy/buddha.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/drivers/ide/legacy/dtc2278.c b/trunk/drivers/ide/legacy/dtc2278.c index 689b2e493413..af791a02a120 100644 --- a/trunk/drivers/ide/legacy/dtc2278.c +++ b/trunk/drivers/ide/legacy/dtc2278.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/legacy/falconide.c b/trunk/drivers/ide/legacy/falconide.c index 39d500d84b07..724f95073d80 100644 --- a/trunk/drivers/ide/legacy/falconide.c +++ b/trunk/drivers/ide/legacy/falconide.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/legacy/gayle.c b/trunk/drivers/ide/legacy/gayle.c index 691506886561..51ba085d7aa8 100644 --- a/trunk/drivers/ide/legacy/gayle.c +++ b/trunk/drivers/ide/legacy/gayle.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/drivers/ide/legacy/ht6560b.c b/trunk/drivers/ide/legacy/ht6560b.c index 5123ea291d07..98f7c95e39ed 100644 --- a/trunk/drivers/ide/legacy/ht6560b.c +++ b/trunk/drivers/ide/legacy/ht6560b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/legacy/ide-cs.c b/trunk/drivers/ide/legacy/ide-cs.c index ee6fc30d5e2b..21bfac137844 100644 --- a/trunk/drivers/ide/legacy/ide-cs.c +++ b/trunk/drivers/ide/legacy/ide-cs.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/drivers/ide/legacy/macide.c b/trunk/drivers/ide/legacy/macide.c index 43f97cc1d30e..a0bb167980e7 100644 --- a/trunk/drivers/ide/legacy/macide.c +++ b/trunk/drivers/ide/legacy/macide.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/legacy/q40ide.c b/trunk/drivers/ide/legacy/q40ide.c index 4af4a8ce4cdf..4abd8fc78197 100644 --- a/trunk/drivers/ide/legacy/q40ide.c +++ b/trunk/drivers/ide/legacy/q40ide.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include /* diff --git a/trunk/drivers/ide/legacy/qd65xx.c b/trunk/drivers/ide/legacy/qd65xx.c index ec408b3a7100..2338f344ea24 100644 --- a/trunk/drivers/ide/legacy/qd65xx.c +++ b/trunk/drivers/ide/legacy/qd65xx.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -150,14 +151,12 @@ static int qd_find_disk_type (ide_drive_t *drive, int *active_time, int *recovery_time) { struct qd65xx_timing_s *p; - char *m = (char *)&drive->id[ATA_ID_PROD]; - char model[ATA_ID_PROD_LEN]; + char model[40]; - if (*m == 0) - return 0; + if (!*drive->id->model) return 0; - strncpy(model, m, ATA_ID_PROD_LEN); - ide_fixstring(model, ATA_ID_PROD_LEN, 1); /* byte-swap */ + strncpy(model,drive->id->model,40); + ide_fixstring(model,40,1); /* byte-swap */ for (p = qd65xx_timing ; p->offset != -1 ; p++) { if (!strncmp(p->model, model+p->offset, 4)) { @@ -186,20 +185,20 @@ static void qd_set_timing (ide_drive_t *drive, u8 timing) static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) { - u16 *id = drive->id; int active_time = 175; int recovery_time = 415; /* worst case values from the dos driver */ /* * FIXME: use "pio" value */ - if (!qd_find_disk_type(drive, &active_time, &recovery_time) && - (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) && - id[ATA_ID_EIDE_PIO] >= 240) { + if (drive->id && !qd_find_disk_type(drive, &active_time, &recovery_time) + && drive->id->tPIO && (drive->id->field_valid & 0x02) + && drive->id->eide_pio >= 240) { + printk(KERN_INFO "%s: PIO mode%d\n", drive->name, - id[ATA_ID_OLD_PIO_MODES] & 0xff); + drive->id->tPIO); active_time = 110; - recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120; + recovery_time = drive->id->eide_pio - 120; } qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time)); diff --git a/trunk/drivers/ide/legacy/umc8672.c b/trunk/drivers/ide/legacy/umc8672.c index 1da076e0c917..b54a14a57755 100644 --- a/trunk/drivers/ide/legacy/umc8672.c +++ b/trunk/drivers/ide/legacy/umc8672.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/pci/aec62xx.c b/trunk/drivers/ide/pci/aec62xx.c index e7475ba559c7..3187215e8f89 100644 --- a/trunk/drivers/ide/pci/aec62xx.c +++ b/trunk/drivers/ide/pci/aec62xx.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -139,7 +140,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio) drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0); } -static unsigned int init_chipset_aec62xx(struct pci_dev *dev) +static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev) { /* These are necessary to get AEC6280 Macintosh cards to work */ if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) || @@ -307,8 +308,6 @@ static struct pci_driver driver = { .id_table = aec62xx_pci_tbl, .probe = aec62xx_init_one, .remove = __devexit_p(aec62xx_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init aec62xx_ide_init(void) diff --git a/trunk/drivers/ide/pci/alim15x3.c b/trunk/drivers/ide/pci/alim15x3.c index 053c75263918..d647526af557 100644 --- a/trunk/drivers/ide/pci/alim15x3.c +++ b/trunk/drivers/ide/pci/alim15x3.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -133,8 +134,8 @@ static u8 ali_udma_filter(ide_drive_t *drive) if (m5229_revision > 0x20 && m5229_revision < 0xC2) { if (drive->media != ide_disk) return 0; - if (wdc_udma == 0 && chip_is_1543c_e && - strstr((char *)&drive->id[ATA_ID_PROD], "WDC ")) + if (chip_is_1543c_e && strstr(drive->id->model, "WDC ") && + wdc_udma == 0) return 0; } @@ -213,7 +214,7 @@ static int ali15x3_dma_setup(ide_drive_t *drive) * appropriate also sets up the 1533 southbridge. */ -static unsigned int init_chipset_ali15x3(struct pci_dev *dev) +static unsigned int __devinit init_chipset_ali15x3(struct pci_dev *dev) { unsigned long flags; u8 tmpbyte; @@ -581,8 +582,6 @@ static struct pci_driver driver = { .id_table = alim15x3_pci_tbl, .probe = alim15x3_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init ali15x3_ide_init(void) diff --git a/trunk/drivers/ide/pci/amd74xx.c b/trunk/drivers/ide/pci/amd74xx.c index 824471f91bf5..1e66a960a96a 100644 --- a/trunk/drivers/ide/pci/amd74xx.c +++ b/trunk/drivers/ide/pci/amd74xx.c @@ -112,13 +112,13 @@ static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio) amd_set_drive(drive, XFER_PIO_0 + pio); } -static void amd7409_cable_detect(struct pci_dev *dev) +static void __devinit amd7409_cable_detect(struct pci_dev *dev) { /* no host side cable detection */ amd_80w = 0x03; } -static void amd7411_cable_detect(struct pci_dev *dev) +static void __devinit amd7411_cable_detect(struct pci_dev *dev) { int i; u32 u = 0; @@ -140,7 +140,7 @@ static void amd7411_cable_detect(struct pci_dev *dev) * The initialization callback. Initialize drive independent registers. */ -static unsigned int init_chipset_amd74xx(struct pci_dev *dev) +static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev) { u8 t = 0, offset = amd_offset(dev); @@ -324,8 +324,6 @@ static struct pci_driver driver = { .id_table = amd74xx_pci_tbl, .probe = amd74xx_probe, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init amd74xx_ide_init(void) diff --git a/trunk/drivers/ide/pci/atiixp.c b/trunk/drivers/ide/pci/atiixp.c index e4437034dd08..41f6cb6c163a 100644 --- a/trunk/drivers/ide/pci/atiixp.c +++ b/trunk/drivers/ide/pci/atiixp.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -187,8 +188,6 @@ static struct pci_driver driver = { .id_table = atiixp_pci_tbl, .probe = atiixp_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init atiixp_ide_init(void) diff --git a/trunk/drivers/ide/pci/cmd640.c b/trunk/drivers/ide/pci/cmd640.c index 7f39cdb41410..e6c62006ca1a 100644 --- a/trunk/drivers/ide/pci/cmd640.c +++ b/trunk/drivers/ide/pci/cmd640.c @@ -103,6 +103,7 @@ #include #include #include +#include #include #include @@ -374,21 +375,6 @@ static void cmd640_dump_regs(void) } #endif -static void __set_prefetch_mode(ide_drive_t *drive, int mode) -{ - if (mode) { /* want prefetch on? */ -#if CMD640_PREFETCH_MASKS - drive->no_unmask = 1; - drive->unmask = 0; -#endif - drive->no_io_32bit = 0; - } else { - drive->no_unmask = 0; - drive->no_io_32bit = 1; - drive->io_32bit = 0; - } -} - #ifndef CONFIG_BLK_DEV_CMD640_ENHANCED /* * Check whether prefetch is on for a drive, @@ -398,10 +384,19 @@ static void __init check_prefetch(ide_drive_t *drive, unsigned int index) { u8 b = get_cmd640_reg(prefetch_regs[index]); - __set_prefetch_mode(drive, (b & prefetch_masks[index]) ? 0 : 1); + if (b & prefetch_masks[index]) { /* is prefetch off? */ + drive->no_unmask = 0; + drive->no_io_32bit = 1; + drive->io_32bit = 0; + } else { +#if CMD640_PREFETCH_MASKS + drive->no_unmask = 1; + drive->unmask = 0; +#endif + drive->no_io_32bit = 0; + } } #else - /* * Sets prefetch mode for a drive. */ @@ -413,11 +408,19 @@ static void set_prefetch_mode(ide_drive_t *drive, unsigned int index, int mode) spin_lock_irqsave(&cmd640_lock, flags); b = __get_cmd640_reg(reg); - __set_prefetch_mode(drive, mode); - if (mode) + if (mode) { /* want prefetch on? */ +#if CMD640_PREFETCH_MASKS + drive->no_unmask = 1; + drive->unmask = 0; +#endif + drive->no_io_32bit = 0; b &= ~prefetch_masks[index]; /* enable prefetch */ - else + } else { + drive->no_unmask = 0; + drive->no_io_32bit = 1; + drive->io_32bit = 0; b |= prefetch_masks[index]; /* disable prefetch */ + } __put_cmd640_reg(reg, b); spin_unlock_irqrestore(&cmd640_lock, flags); } diff --git a/trunk/drivers/ide/pci/cmd64x.c b/trunk/drivers/ide/pci/cmd64x.c index 456dee18b660..e064398e03b4 100644 --- a/trunk/drivers/ide/pci/cmd64x.c +++ b/trunk/drivers/ide/pci/cmd64x.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -331,7 +332,7 @@ static int cmd646_1_dma_end(ide_drive_t *drive) return (dma_stat & 7) != 4; } -static unsigned int init_chipset_cmd64x(struct pci_dev *dev) +static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev) { u8 mrdmode = 0; @@ -510,8 +511,6 @@ static struct pci_driver driver = { .id_table = cmd64x_pci_tbl, .probe = cmd64x_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init cmd64x_ide_init(void) diff --git a/trunk/drivers/ide/pci/cs5520.c b/trunk/drivers/ide/pci/cs5520.c index d6341f7c4144..151844fcbb07 100644 --- a/trunk/drivers/ide/pci/cs5520.c +++ b/trunk/drivers/ide/pci/cs5520.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -149,8 +150,6 @@ static struct pci_driver driver = { .name = "Cyrix_IDE", .id_table = cs5520_pci_tbl, .probe = cs5520_init_one, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init cs5520_ide_init(void) diff --git a/trunk/drivers/ide/pci/cs5530.c b/trunk/drivers/ide/pci/cs5530.c index da42fa7e9f97..f235db8c678b 100644 --- a/trunk/drivers/ide/pci/cs5530.c +++ b/trunk/drivers/ide/pci/cs5530.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -80,19 +81,17 @@ static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio) static u8 cs5530_udma_filter(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *mate = ide_get_pair_dev(drive); - u16 *mateid = mate->id; + ide_drive_t *mate = &hwif->drives[(drive->dn & 1) ^ 1]; + struct hd_driveid *mateid = mate->id; u8 mask = hwif->ultra_mask; - if (mate == NULL) + if (mate->present == 0) goto out; - if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) { - if ((mateid[ATA_ID_FIELD_VALID] & 4) && - (mateid[ATA_ID_UDMA_MODES] & 7)) + if ((mateid->capability & 1) && __ide_dma_bad_drive(mate) == 0) { + if ((mateid->field_valid & 4) && (mateid->dma_ultra & 7)) goto out; - if ((mateid[ATA_ID_FIELD_VALID] & 2) && - (mateid[ATA_ID_MWDMA_MODES] & 7)) + if ((mateid->field_valid & 2) && (mateid->dma_mword & 7)) mask = 0; } out: @@ -134,7 +133,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode) * Initialize the cs5530 bridge for reliable IDE DMA operation. */ -static unsigned int init_chipset_cs5530(struct pci_dev *dev) +static unsigned int __devinit init_chipset_cs5530(struct pci_dev *dev) { struct pci_dev *master_0 = NULL, *cs5530_0 = NULL; @@ -272,8 +271,6 @@ static struct pci_driver driver = { .id_table = cs5530_pci_tbl, .probe = cs5530_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init cs5530_ide_init(void) diff --git a/trunk/drivers/ide/pci/cs5535.c b/trunk/drivers/ide/pci/cs5535.c index 1e5bc59ea2fb..dd3dc23af995 100644 --- a/trunk/drivers/ide/pci/cs5535.c +++ b/trunk/drivers/ide/pci/cs5535.c @@ -80,12 +80,12 @@ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed) /* Set the PIO timings */ if (speed < XFER_SW_DMA_0) { - ide_drive_t *pair = ide_get_pair_dev(drive); + ide_drive_t *pair = ide_get_paired_drive(drive); u8 cmd, pioa; cmd = pioa = speed - XFER_PIO_0; - if (pair) { + if (pair->present) { u8 piob = ide_get_best_pio_mode(pair, 255, 4); if (piob < cmd) @@ -193,12 +193,10 @@ static const struct pci_device_id cs5535_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, cs5535_pci_tbl); static struct pci_driver driver = { - .name = "CS5535_IDE", - .id_table = cs5535_pci_tbl, - .probe = cs5535_init_one, - .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, + .name = "CS5535_IDE", + .id_table = cs5535_pci_tbl, + .probe = cs5535_init_one, + .remove = ide_pci_remove, }; static int __init cs5535_ide_init(void) diff --git a/trunk/drivers/ide/pci/cy82c693.c b/trunk/drivers/ide/pci/cy82c693.c index 69820e9224d1..e6d8ee88d56d 100644 --- a/trunk/drivers/ide/pci/cy82c693.c +++ b/trunk/drivers/ide/pci/cy82c693.c @@ -332,7 +332,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio) /* * this function is called during init and is used to setup the cy82c693 chip */ -static unsigned int init_chipset_cy82c693(struct pci_dev *dev) +static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev) { if (PCI_FUNC(dev->devfn) != 1) return 0; @@ -448,8 +448,6 @@ static struct pci_driver driver = { .id_table = cy82c693_pci_tbl, .probe = cy82c693_init_one, .remove = __devexit_p(cy82c693_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init cy82c693_ide_init(void) diff --git a/trunk/drivers/ide/pci/delkin_cb.c b/trunk/drivers/ide/pci/delkin_cb.c index 83b63b365e51..f84bfb4f600f 100644 --- a/trunk/drivers/ide/pci/delkin_cb.c +++ b/trunk/drivers/ide/pci/delkin_cb.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/trunk/drivers/ide/pci/generic.c b/trunk/drivers/ide/pci/generic.c index 092b238cb250..b07d4f4273b3 100644 --- a/trunk/drivers/ide/pci/generic.c +++ b/trunk/drivers/ide/pci/generic.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -171,8 +172,6 @@ static struct pci_driver driver = { .id_table = generic_pci_tbl, .probe = generic_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init generic_ide_init(void) diff --git a/trunk/drivers/ide/pci/hpt34x.c b/trunk/drivers/ide/pci/hpt34x.c index 644de29f8fe4..6009b0b9655d 100644 --- a/trunk/drivers/ide/pci/hpt34x.c +++ b/trunk/drivers/ide/pci/hpt34x.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio) */ #define HPT34X_PCI_INIT_REG 0x80 -static unsigned int init_chipset_hpt34x(struct pci_dev *dev) +static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev) { int i = 0; unsigned long hpt34xIoBase = pci_resource_start(dev, 4); @@ -171,8 +172,6 @@ static struct pci_driver driver = { .id_table = hpt34x_pci_tbl, .probe = hpt34x_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init hpt34x_ide_init(void) diff --git a/trunk/drivers/ide/pci/hpt366.c b/trunk/drivers/ide/pci/hpt366.c index a194022b6a61..c37ab1743819 100644 --- a/trunk/drivers/ide/pci/hpt366.c +++ b/trunk/drivers/ide/pci/hpt366.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -604,10 +605,10 @@ static const struct hpt_info hpt371n __devinitdata = { static int check_in_drive_list(ide_drive_t *drive, const char **list) { - char *m = (char *)&drive->id[ATA_ID_PROD]; + struct hd_driveid *id = drive->id; while (*list) - if (!strcmp(*list++, m)) + if (!strcmp(*list++,id->model)) return 1; return 0; } @@ -654,7 +655,7 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive) case HPT372A: case HPT372N: case HPT374 : - if (ata_id_is_sata(drive->id)) + if (ide_dev_is_sata(drive->id)) mask &= ~0x0e; /* Fall thru */ default: @@ -674,7 +675,7 @@ static u8 hpt3xx_mdma_filter(ide_drive_t *drive) case HPT372A: case HPT372N: case HPT374 : - if (ata_id_is_sata(drive->id)) + if (ide_dev_is_sata(drive->id)) return 0x00; /* Fall thru */ default: @@ -730,11 +731,11 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio) static void hpt3xx_quirkproc(ide_drive_t *drive) { - char *m = (char *)&drive->id[ATA_ID_PROD]; + struct hd_driveid *id = drive->id; const char **list = quirk_drives; while (*list) - if (strstr(m, *list++)) { + if (strstr(id->model, *list++)) { drive->quirk_list = 1; return; } @@ -943,7 +944,7 @@ static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq) * Perform a calibration cycle on the DPLL. * Returns 1 if this succeeds */ -static int hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high) +static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high) { u32 dpll = (f_high << 16) | f_low | 0x100; u8 scr2; @@ -971,37 +972,7 @@ static int hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high) return 1; } -static void hpt3xx_disable_fast_irq(struct pci_dev *dev, u8 mcr_addr) -{ - struct ide_host *host = pci_get_drvdata(dev); - struct hpt_info *info = host->host_priv + (&dev->dev == host->dev[1]); - u8 chip_type = info->chip_type; - u8 new_mcr, old_mcr = 0; - - /* - * Disable the "fast interrupt" prediction. Don't hold off - * on interrupts. (== 0x01 despite what the docs say) - */ - pci_read_config_byte(dev, mcr_addr + 1, &old_mcr); - - if (chip_type >= HPT374) - new_mcr = old_mcr & ~0x07; - else if (chip_type >= HPT370) { - new_mcr = old_mcr; - new_mcr &= ~0x02; -#ifdef HPT_DELAY_INTERRUPT - new_mcr &= ~0x01; -#else - new_mcr |= 0x01; -#endif - } else /* HPT366 and HPT368 */ - new_mcr = old_mcr & ~0x80; - - if (new_mcr != old_mcr) - pci_write_config_byte(dev, mcr_addr + 1, new_mcr); -} - -static unsigned int init_chipset_hpt366(struct pci_dev *dev) +static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev) { unsigned long io_base = pci_resource_start(dev, 4); struct hpt_info *info = hpt3xx_get_info(&dev->dev); @@ -1238,10 +1209,8 @@ static unsigned int init_chipset_hpt366(struct pci_dev *dev) * NOTE: This register is only writeable via I/O space. */ if (chip_type == HPT371N && clock == ATA_CLOCK_66MHZ) - outb(inb(io_base + 0x9c) | 0x04, io_base + 0x9c); - hpt3xx_disable_fast_irq(dev, 0x50); - hpt3xx_disable_fast_irq(dev, 0x54); + outb(inb(io_base + 0x9c) | 0x04, io_base + 0x9c); return dev->irq; } @@ -1296,6 +1265,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) struct hpt_info *info = hpt3xx_get_info(hwif->dev); int serialize = HPT_SERIALIZE_IO; u8 chip_type = info->chip_type; + u8 new_mcr, old_mcr = 0; /* Cache the channel's MISC. control registers' offset */ hwif->select_data = hwif->channel ? 0x54 : 0x50; @@ -1318,6 +1288,29 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) /* Serialize access to this device if needed */ if (serialize && hwif->mate) hwif->serialized = hwif->mate->serialized = 1; + + /* + * Disable the "fast interrupt" prediction. Don't hold off + * on interrupts. (== 0x01 despite what the docs say) + */ + pci_read_config_byte(dev, hwif->select_data + 1, &old_mcr); + + if (info->chip_type >= HPT374) + new_mcr = old_mcr & ~0x07; + else if (info->chip_type >= HPT370) { + new_mcr = old_mcr; + new_mcr &= ~0x02; + +#ifdef HPT_DELAY_INTERRUPT + new_mcr &= ~0x01; +#else + new_mcr |= 0x01; +#endif + } else /* HPT366 and HPT368 */ + new_mcr = old_mcr & ~0x80; + + if (new_mcr != old_mcr) + pci_write_config_byte(dev, hwif->select_data + 1, new_mcr); } static int __devinit init_dma_hpt366(ide_hwif_t *hwif, @@ -1627,8 +1620,6 @@ static struct pci_driver driver = { .id_table = hpt366_pci_tbl, .probe = hpt366_init_one, .remove = __devexit_p(hpt366_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init hpt366_ide_init(void) diff --git a/trunk/drivers/ide/pci/it8213.c b/trunk/drivers/ide/pci/it8213.c index 0954ccd08d6f..652e47dd7e89 100644 --- a/trunk/drivers/ide/pci/it8213.c +++ b/trunk/drivers/ide/pci/it8213.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -194,8 +195,6 @@ static struct pci_driver driver = { .id_table = it8213_pci_tbl, .probe = it8213_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init it8213_ide_init(void) diff --git a/trunk/drivers/ide/pci/it821x.c b/trunk/drivers/ide/pci/it821x.c index 46edd083b348..4a1508a707cc 100644 --- a/trunk/drivers/ide/pci/it821x.c +++ b/trunk/drivers/ide/pci/it821x.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -445,7 +446,8 @@ static u8 it821x_cable_detect(ide_hwif_t *hwif) static void it821x_quirkproc(ide_drive_t *drive) { struct it821x_dev *itdev = ide_get_hwifdata(drive->hwif); - u16 *id = drive->id; + struct hd_driveid *id = drive->id; + u16 *idbits = (u16 *)drive->id; if (!itdev->smart) { /* @@ -464,36 +466,36 @@ static void it821x_quirkproc(ide_drive_t *drive) */ /* Check for RAID v native */ - if (strstr((char *)&id[ATA_ID_PROD], - "Integrated Technology Express")) { + if(strstr(id->model, "Integrated Technology Express")) { /* In raid mode the ident block is slightly buggy We need to set the bits so that the IDE layer knows LBA28. LBA48 and DMA ar valid */ - id[ATA_ID_CAPABILITY] |= (3 << 8); /* LBA28, DMA */ - id[ATA_ID_COMMAND_SET_2] |= 0x0400; /* LBA48 valid */ - id[ATA_ID_CFS_ENABLE_2] |= 0x0400; /* LBA48 on */ + id->capability |= 3; /* LBA28, DMA */ + id->command_set_2 |= 0x0400; /* LBA48 valid */ + id->cfs_enable_2 |= 0x0400; /* LBA48 on */ /* Reporting logic */ printk(KERN_INFO "%s: IT8212 %sRAID %d volume", - drive->name, id[147] ? "Bootable " : "", - id[ATA_ID_CSFO]); - if (id[ATA_ID_CSFO] != 1) - printk(KERN_CONT "(%dK stripe)", id[146]); - printk(KERN_CONT ".\n"); + drive->name, + idbits[147] ? "Bootable ":"", + idbits[129]); + if(idbits[129] != 1) + printk("(%dK stripe)", idbits[146]); + printk(".\n"); } else { /* Non RAID volume. Fixups to stop the core code doing unsupported things */ - id[ATA_ID_FIELD_VALID] &= 3; - id[ATA_ID_QUEUE_DEPTH] = 0; - id[ATA_ID_COMMAND_SET_1] = 0; - id[ATA_ID_COMMAND_SET_2] &= 0xC400; - id[ATA_ID_CFSSE] &= 0xC000; - id[ATA_ID_CFS_ENABLE_1] = 0; - id[ATA_ID_CFS_ENABLE_2] &= 0xC400; - id[ATA_ID_CSF_DEFAULT] &= 0xC000; - id[127] = 0; - id[ATA_ID_DLF] = 0; - id[ATA_ID_CSFO] = 0; - id[ATA_ID_CFA_POWER] = 0; + id->field_valid &= 3; + id->queue_depth = 0; + id->command_set_1 = 0; + id->command_set_2 &= 0xC400; + id->cfsse &= 0xC000; + id->cfs_enable_1 = 0; + id->cfs_enable_2 &= 0xC400; + id->csf_default &= 0xC000; + id->word127 = 0; + id->dlf = 0; + id->csfo = 0; + id->cfa_power = 0; printk(KERN_INFO "%s: Performing identify fixups.\n", drive->name); } @@ -503,8 +505,8 @@ static void it821x_quirkproc(ide_drive_t *drive) * IDE core that DMA is supported (it821x hardware * takes care of DMA mode programming). */ - if (ata_id_has_dma(id)) { - id[ATA_ID_MWDMA_MODES] |= 0x0101; + if (id->capability & 1) { + id->dma_mword |= 0x0101; drive->current_speed = XFER_MW_DMA_0; } } @@ -586,7 +588,7 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif) hwif->mwdma_mask = ATA_MWDMA2; } -static void it8212_disable_raid(struct pci_dev *dev) +static void __devinit it8212_disable_raid(struct pci_dev *dev) { /* Reset local CPU, and set BIOS not ready */ pci_write_config_byte(dev, 0x5E, 0x01); @@ -603,7 +605,7 @@ static void it8212_disable_raid(struct pci_dev *dev) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20); } -static unsigned int init_chipset_it821x(struct pci_dev *dev) +static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev) { u8 conf; static char *mode[2] = { "pass through", "smart" }; @@ -685,8 +687,6 @@ static struct pci_driver driver = { .id_table = it821x_pci_tbl, .probe = it821x_init_one, .remove = __devexit_p(it821x_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init it821x_ide_init(void) diff --git a/trunk/drivers/ide/pci/jmicron.c b/trunk/drivers/ide/pci/jmicron.c index acd647110648..bb9d09d8f196 100644 --- a/trunk/drivers/ide/pci/jmicron.c +++ b/trunk/drivers/ide/pci/jmicron.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -154,8 +155,6 @@ static struct pci_driver driver = { .id_table = jmicron_pci_tbl, .probe = jmicron_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init jmicron_ide_init(void) diff --git a/trunk/drivers/ide/pci/ns87415.c b/trunk/drivers/ide/pci/ns87415.c index 53bd645736d9..ffefcd15196c 100644 --- a/trunk/drivers/ide/pci/ns87415.c +++ b/trunk/drivers/ide/pci/ns87415.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -273,9 +274,9 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) do { udelay(50); stat = hwif->tp_ops->read_status(hwif); - if (stat == 0xff) - break; - } while ((stat & ATA_BUSY) && --timeout); + if (stat == 0xff) + break; + } while ((stat & BUSY_STAT) && --timeout); #endif } @@ -339,8 +340,6 @@ static struct pci_driver driver = { .id_table = ns87415_pci_tbl, .probe = ns87415_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init ns87415_ide_init(void) diff --git a/trunk/drivers/ide/pci/opti621.c b/trunk/drivers/ide/pci/opti621.c index 3de11ddcf863..e28e672ddafc 100644 --- a/trunk/drivers/ide/pci/opti621.c +++ b/trunk/drivers/ide/pci/opti621.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include @@ -136,7 +137,7 @@ static u8 read_reg(int reg) static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *pair = ide_get_pair_dev(drive); + ide_drive_t *pair = ide_get_paired_drive(drive); unsigned long flags; u8 tim, misc, addr_pio = pio, clk; @@ -152,7 +153,7 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio) drive->drive_data = XFER_PIO_0 + pio; - if (pair) { + if (pair->present) { if (pair->drive_data && pair->drive_data < drive->drive_data) addr_pio = pair->drive_data - XFER_PIO_0; } @@ -225,8 +226,6 @@ static struct pci_driver driver = { .id_table = opti621_pci_tbl, .probe = opti621_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init opti621_ide_init(void) diff --git a/trunk/drivers/ide/pci/pdc202xx_new.c b/trunk/drivers/ide/pci/pdc202xx_new.c index 9fc59962553b..d477da6b5858 100644 --- a/trunk/drivers/ide/pci/pdc202xx_new.c +++ b/trunk/drivers/ide/pci/pdc202xx_new.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -202,10 +203,10 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif) static void pdcnew_quirkproc(ide_drive_t *drive) { - const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; + const char **list, *model = drive->id->model; for (list = pdc_quirk_drives; *list != NULL; list++) - if (strstr(m, *list) != NULL) { + if (strstr(model, *list) != NULL) { drive->quirk_list = 2; return; } @@ -226,7 +227,7 @@ static void pdcnew_reset(ide_drive_t *drive) * read_counter - Read the byte count registers * @dma_base: for the port address */ -static long read_counter(u32 dma_base) +static long __devinit read_counter(u32 dma_base) { u32 pri_dma_base = dma_base, sec_dma_base = dma_base + 0x08; u8 cnt0, cnt1, cnt2, cnt3; @@ -266,7 +267,7 @@ static long read_counter(u32 dma_base) * @dma_base: for the port address * E.g. 16949000 on 33 MHz PCI bus, i.e. half of the PCI clock. */ -static long detect_pll_input_clock(unsigned long dma_base) +static long __devinit detect_pll_input_clock(unsigned long dma_base) { struct timeval start_time, end_time; long start_count, end_count; @@ -309,7 +310,7 @@ static long detect_pll_input_clock(unsigned long dma_base) } #ifdef CONFIG_PPC_PMAC -static void apple_kiwi_init(struct pci_dev *pdev) +static void __devinit apple_kiwi_init(struct pci_dev *pdev) { struct device_node *np = pci_device_to_OF_node(pdev); u8 conf; @@ -325,7 +326,7 @@ static void apple_kiwi_init(struct pci_dev *pdev) } #endif /* CONFIG_PPC_PMAC */ -static unsigned int init_chipset_pdcnew(struct pci_dev *dev) +static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev) { const char *name = DRV_NAME; unsigned long dma_base = pci_resource_start(dev, 4); @@ -566,8 +567,6 @@ static struct pci_driver driver = { .id_table = pdc202new_pci_tbl, .probe = pdc202new_init_one, .remove = __devexit_p(pdc202new_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init pdc202new_ide_init(void) diff --git a/trunk/drivers/ide/pci/pdc202xx_old.c b/trunk/drivers/ide/pci/pdc202xx_old.c index cb6d2a00c514..de9a27400462 100644 --- a/trunk/drivers/ide/pci/pdc202xx_old.c +++ b/trunk/drivers/ide/pci/pdc202xx_old.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -85,7 +86,7 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) * Prefetch_EN / IORDY_EN / PA[3:0] bits of register A */ AP &= ~0x3f; - if (ata_id_iordy_disable(drive->id)) + if (drive->id->capability & 4) AP |= 0x20; /* set IORDY_EN bit */ if (drive->media == ide_disk) AP |= 0x10; /* set Prefetch_EN bit */ @@ -153,10 +154,10 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif) static void pdc202xx_quirkproc(ide_drive_t *drive) { - const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; + const char **list, *model = drive->id->model; for (list = pdc_quirk_drives; *list != NULL; list++) - if (strstr(m, *list) != NULL) { + if (strstr(model, *list) != NULL) { drive->quirk_list = 2; return; } @@ -264,7 +265,7 @@ static void pdc202xx_dma_timeout(ide_drive_t *drive) ide_dma_timeout(drive); } -static unsigned int init_chipset_pdc202xx(struct pci_dev *dev) +static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev) { unsigned long dmabase = pci_resource_start(dev, 4); u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0; @@ -431,8 +432,6 @@ static struct pci_driver driver = { .id_table = pdc202xx_pci_tbl, .probe = pdc202xx_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init pdc202xx_ide_init(void) diff --git a/trunk/drivers/ide/pci/piix.c b/trunk/drivers/ide/pci/piix.c index a06c03f8e295..30cfc815fe31 100644 --- a/trunk/drivers/ide/pci/piix.c +++ b/trunk/drivers/ide/pci/piix.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -204,7 +205,7 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) * out to be nice and simple. */ -static unsigned int init_chipset_ich(struct pci_dev *dev) +static unsigned int __devinit init_chipset_ich(struct pci_dev *dev) { u32 extra = 0; @@ -449,8 +450,6 @@ static struct pci_driver driver = { .id_table = piix_pci_tbl, .probe = piix_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init piix_ide_init(void) diff --git a/trunk/drivers/ide/pci/rz1000.c b/trunk/drivers/ide/pci/rz1000.c index c117a068761b..8d11ee838a2a 100644 --- a/trunk/drivers/ide/pci/rz1000.c +++ b/trunk/drivers/ide/pci/rz1000.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/drivers/ide/pci/sc1200.c b/trunk/drivers/ide/pci/sc1200.c index bdc1fed41260..8efaed16fea3 100644 --- a/trunk/drivers/ide/pci/sc1200.c +++ b/trunk/drivers/ide/pci/sc1200.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -103,19 +104,17 @@ static void sc1200_tunepio(ide_drive_t *drive, u8 pio) static u8 sc1200_udma_filter(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *mate = ide_get_pair_dev(drive); - u16 *mateid = mate->id; + ide_drive_t *mate = &hwif->drives[(drive->dn & 1) ^ 1]; + struct hd_driveid *mateid = mate->id; u8 mask = hwif->ultra_mask; - if (mate == NULL) + if (mate->present == 0) goto out; - if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) { - if ((mateid[ATA_ID_FIELD_VALID] & 4) && - (mateid[ATA_ID_UDMA_MODES] & 7)) + if ((mateid->capability & 1) && __ide_dma_bad_drive(mate) == 0) { + if ((mateid->field_valid & 4) && (mateid->dma_ultra & 7)) goto out; - if ((mateid[ATA_ID_FIELD_VALID] & 2) && - (mateid[ATA_ID_MWDMA_MODES] & 7)) + if ((mateid->field_valid & 2) && (mateid->dma_mword & 7)) mask = 0; } out: diff --git a/trunk/drivers/ide/pci/scc_pata.c b/trunk/drivers/ide/pci/scc_pata.c index e92a874b31df..44cccd1e086a 100644 --- a/trunk/drivers/ide/pci/scc_pata.c +++ b/trunk/drivers/ide/pci/scc_pata.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -399,7 +400,7 @@ static int scc_dma_end(ide_drive_t *drive) /* errata A308 workaround: Step5 (check data loss) */ /* We don't check non ide_disk because it is limited to UDMA4 */ if (!(in_be32((void __iomem *)hwif->io_ports.ctl_addr) - & ATA_ERR) && + & ERR_STAT) && drive->media == ide_disk && drive->current_speed > XFER_UDMA_4) { reg = in_be32((void __iomem *)intsts_port); if (!(reg & INTSTS_ACTEINT)) { @@ -503,7 +504,7 @@ static int scc_dma_test_irq(ide_drive_t *drive) /* SCC errata A252,A308 workaround: Step4 */ if ((in_be32((void __iomem *)hwif->io_ports.ctl_addr) - & ATA_ERR) && + & ERR_STAT) && (int_stat & INTSTS_INTRQ)) return 1; diff --git a/trunk/drivers/ide/pci/serverworks.c b/trunk/drivers/ide/pci/serverworks.c index 3dff2aea317e..c3bdc6e51a48 100644 --- a/trunk/drivers/ide/pci/serverworks.c +++ b/trunk/drivers/ide/pci/serverworks.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -56,10 +57,8 @@ static struct pci_dev *isa_dev; static int check_in_drive_lists (ide_drive_t *drive, const char **list) { - char *m = (char *)&drive->id[ATA_ID_PROD]; - while (*list) - if (!strcmp(*list++, m)) + if (!strcmp(*list++, drive->id->model)) return 1; return 0; } @@ -175,7 +174,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x54, ultra_enable); } -static unsigned int init_chipset_svwks(struct pci_dev *dev) +static unsigned int __devinit init_chipset_svwks(struct pci_dev *dev) { unsigned int reg; u8 btr; @@ -448,8 +447,6 @@ static struct pci_driver driver = { .id_table = svwks_pci_tbl, .probe = svwks_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init svwks_ide_init(void) diff --git a/trunk/drivers/ide/pci/sgiioc4.c b/trunk/drivers/ide/pci/sgiioc4.c index 1017fb4f6317..681306c9d79b 100644 --- a/trunk/drivers/ide/pci/sgiioc4.c +++ b/trunk/drivers/ide/pci/sgiioc4.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/drivers/ide/pci/siimage.c b/trunk/drivers/ide/pci/siimage.c index 174a873b4c64..db2b88a369ab 100644 --- a/trunk/drivers/ide/pci/siimage.c +++ b/trunk/drivers/ide/pci/siimage.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -222,9 +223,7 @@ static u8 sil_pata_udma_filter(ide_drive_t *drive) static u8 sil_sata_udma_filter(ide_drive_t *drive) { - char *m = (char *)&drive->id[ATA_ID_PROD]; - - return strstr(m, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6; + return strstr(drive->id->model, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6; } /** @@ -244,7 +243,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) ide_hwif_t *hwif = HWIF(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); - ide_drive_t *pair = ide_get_pair_dev(drive); + ide_drive_t *pair = ide_get_paired_drive(drive); u32 speedt = 0; u16 speedp = 0; unsigned long addr = siimage_seldev(drive, 0x04); @@ -258,7 +257,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) u8 unit = drive->select.b.unit; /* trim *taskfile* PIO to the slowest of the master/slave */ - if (pair) { + if (pair->present) { u8 pair_pio = ide_get_best_pio_mode(pair, 255, 4); if (pair_pio < tf_pio) @@ -463,7 +462,7 @@ static void sil_sata_pre_reset(ide_drive_t *drive) * to 133 MHz clocking if the system isn't already set up to do it. */ -static unsigned int init_chipset_siimage(struct pci_dev *dev) +static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev) { struct ide_host *host = pci_get_drvdata(dev); void __iomem *ioaddr = host->host_priv; @@ -617,8 +616,8 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif) static int is_dev_seagate_sata(ide_drive_t *drive) { - const char *s = (const char *)&drive->id[ATA_ID_PROD]; - unsigned len = strnlen(s, ATA_ID_PROD_LEN); + const char *s = &drive->id->model[0]; + unsigned len = strnlen(s, sizeof(drive->id->model)); if ((len > 4) && (!memcmp(s, "ST", 2))) if ((!memcmp(s + len - 2, "AS", 2)) || @@ -834,8 +833,6 @@ static struct pci_driver driver = { .id_table = siimage_pci_tbl, .probe = siimage_init_one, .remove = __devexit_p(siimage_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init siimage_ide_init(void) diff --git a/trunk/drivers/ide/pci/sis5513.c b/trunk/drivers/ide/pci/sis5513.c index 734dd41f1f67..5efe21d6ef97 100644 --- a/trunk/drivers/ide/pci/sis5513.c +++ b/trunk/drivers/ide/pci/sis5513.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -447,7 +448,7 @@ static int __devinit sis_find_family(struct pci_dev *dev) return chipset_family; } -static unsigned int init_chipset_sis5513(struct pci_dev *dev) +static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev) { /* Make general config ops here 1/ tell IDE channels to operate in Compatibility mode only @@ -610,8 +611,6 @@ static struct pci_driver driver = { .id_table = sis5513_pci_tbl, .probe = sis5513_init_one, .remove = __devexit_p(sis5513_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init sis5513_ide_init(void) diff --git a/trunk/drivers/ide/pci/sl82c105.c b/trunk/drivers/ide/pci/sl82c105.c index 37a6b7bdc040..73905bcc08fb 100644 --- a/trunk/drivers/ide/pci/sl82c105.c +++ b/trunk/drivers/ide/pci/sl82c105.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -61,7 +62,7 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio) if (cmd_off == 0) cmd_off = 1; - if (pio > 2 || ata_id_has_iordy(drive->id)) + if (pio > 2 || ide_dev_has_iordy(drive->id)) iordy = 0x40; return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy; @@ -271,7 +272,7 @@ static u8 sl82c105_bridge_revision(struct pci_dev *dev) * channel 0 here at least, but channel 1 has to be enabled by * firmware or arch code. We still set both to 16 bits mode. */ -static unsigned int init_chipset_sl82c105(struct pci_dev *dev) +static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev) { u32 val; @@ -350,8 +351,6 @@ static struct pci_driver driver = { .id_table = sl82c105_pci_tbl, .probe = sl82c105_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init sl82c105_ide_init(void) diff --git a/trunk/drivers/ide/pci/slc90e66.c b/trunk/drivers/ide/pci/slc90e66.c index a9551a13ac57..866d6c65e3a0 100644 --- a/trunk/drivers/ide/pci/slc90e66.c +++ b/trunk/drivers/ide/pci/slc90e66.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -159,8 +160,6 @@ static struct pci_driver driver = { .id_table = slc90e66_pci_tbl, .probe = slc90e66_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init slc90e66_ide_init(void) diff --git a/trunk/drivers/ide/pci/triflex.c b/trunk/drivers/ide/pci/triflex.c index be8715dcee05..b77ec35151b3 100644 --- a/trunk/drivers/ide/pci/triflex.c +++ b/trunk/drivers/ide/pci/triflex.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -119,8 +120,6 @@ static struct pci_driver driver = { .id_table = triflex_pci_tbl, .probe = triflex_init_one, .remove = ide_pci_remove, - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init triflex_ide_init(void) diff --git a/trunk/drivers/ide/pci/trm290.c b/trunk/drivers/ide/pci/trm290.c index 4dfbc6a68b5b..fd28b49977fd 100644 --- a/trunk/drivers/ide/pci/trm290.c +++ b/trunk/drivers/ide/pci/trm290.c @@ -135,6 +135,7 @@ #include #include #include +#include #include #include diff --git a/trunk/drivers/ide/pci/via82cxxx.c b/trunk/drivers/ide/pci/via82cxxx.c index acacdaab69c2..94fb9ab3223f 100644 --- a/trunk/drivers/ide/pci/via82cxxx.c +++ b/trunk/drivers/ide/pci/via82cxxx.c @@ -154,7 +154,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) static void via_set_drive(ide_drive_t *drive, const u8 speed) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *peer = ide_get_pair_dev(drive); + ide_drive_t *peer = hwif->drives + (~drive->dn & 1); struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct via82cxxx_dev *vdev = host->host_priv; @@ -173,7 +173,7 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed) ide_timing_compute(drive, speed, &t, T, UT); - if (peer) { + if (peer->present) { ide_timing_compute(peer, peer->current_speed, &p, T, UT); ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); } @@ -215,7 +215,7 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa) /* * Check and handle 80-wire cable presence */ -static void via_cable_detect(struct via82cxxx_dev *vdev, u32 u) +static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) { int i; @@ -267,7 +267,7 @@ static void via_cable_detect(struct via82cxxx_dev *vdev, u32 u) * and initialize its drive independent registers. */ -static unsigned int init_chipset_via82cxxx(struct pci_dev *dev) +static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev) { struct ide_host *host = pci_get_drvdata(dev); struct via82cxxx_dev *vdev = host->host_priv; @@ -492,8 +492,6 @@ static struct pci_driver driver = { .id_table = via_pci_tbl, .probe = via_init_one, .remove = __devexit_p(via_remove), - .suspend = ide_pci_suspend, - .resume = ide_pci_resume, }; static int __init via_ide_init(void) diff --git a/trunk/drivers/ide/ppc/pmac.c b/trunk/drivers/ide/ppc/pmac.c index c3432da78d52..fa2be26272d5 100644 --- a/trunk/drivers/ide/ppc/pmac.c +++ b/trunk/drivers/ide/ppc/pmac.c @@ -669,9 +669,9 @@ static void set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2, u8 speed) { - u16 *id = drive->id; int cycleTime, accessTime = 0, recTime = 0; unsigned accessTicks, recTicks; + struct hd_driveid *id = drive->id; struct mdma_timings_t* tm = NULL; int i; @@ -686,8 +686,8 @@ set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2, } /* Check if drive provides explicit DMA cycle time */ - if ((id[ATA_ID_FIELD_VALID] & 2) && id[ATA_ID_EIDE_DMA_TIME]) - cycleTime = max_t(int, id[ATA_ID_EIDE_DMA_TIME], cycleTime); + if ((id->field_valid & 2) && id->eide_dma_time) + cycleTime = max_t(int, id->eide_dma_time, cycleTime); /* OHare limits according to some old Apple sources */ if ((intf_type == controller_ohare) && (cycleTime < 150)) diff --git a/trunk/drivers/ide/setup-pci.c b/trunk/drivers/ide/setup-pci.c index 9f1f9163a136..a8e9e8a69a52 100644 --- a/trunk/drivers/ide/setup-pci.c +++ b/trunk/drivers/ide/setup-pci.c @@ -659,36 +659,3 @@ void ide_pci_remove(struct pci_dev *dev) pci_disable_device(dev); } EXPORT_SYMBOL_GPL(ide_pci_remove); - -#ifdef CONFIG_PM -int ide_pci_suspend(struct pci_dev *dev, pm_message_t state) -{ - pci_save_state(dev); - pci_disable_device(dev); - pci_set_power_state(dev, pci_choose_state(dev, state)); - - return 0; -} -EXPORT_SYMBOL_GPL(ide_pci_suspend); - -int ide_pci_resume(struct pci_dev *dev) -{ - struct ide_host *host = pci_get_drvdata(dev); - int rc; - - pci_set_power_state(dev, PCI_D0); - - rc = pci_enable_device(dev); - if (rc) - return rc; - - pci_restore_state(dev); - pci_set_master(dev); - - if (host->init_chipset) - host->init_chipset(dev); - - return 0; -} -EXPORT_SYMBOL_GPL(ide_pci_resume); -#endif diff --git a/trunk/drivers/pci/Makefile b/trunk/drivers/pci/Makefile index 4b47f4ece5b7..7d63f8ced24b 100644 --- a/trunk/drivers/pci/Makefile +++ b/trunk/drivers/pci/Makefile @@ -26,8 +26,6 @@ obj-$(CONFIG_HT_IRQ) += htirq.o # Build Intel IOMMU support obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o -obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o - # # Some architectures use the generic PCI setup functions # diff --git a/trunk/drivers/pci/dma_remapping.h b/trunk/drivers/pci/dma_remapping.h deleted file mode 100644 index bff5c65f81dc..000000000000 --- a/trunk/drivers/pci/dma_remapping.h +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef _DMA_REMAPPING_H -#define _DMA_REMAPPING_H - -/* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. - */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) - - -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & PAGE_MASK_4K; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): - NULL); -} - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) - -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - -struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - -extern int init_dmars(void); -extern void free_dmar_iommu(struct intel_iommu *iommu); - -extern int dmar_disabled; - -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ - -#endif diff --git a/trunk/drivers/pci/dmar.c b/trunk/drivers/pci/dmar.c index bd2c01674f5e..8bf86ae2333f 100644 --- a/trunk/drivers/pci/dmar.c +++ b/trunk/drivers/pci/dmar.c @@ -19,16 +19,13 @@ * Author: Shaohua Li * Author: Anil S Keshavamurthy * - * This file implements early detection/parsing of Remapping Devices + * This file implements early detection/parsing of DMA Remapping Devices * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI * tables. - * - * These routines are used by both DMA-remapping and Interrupt-remapping */ #include #include -#include #include "iova.h" #include "intel-iommu.h" @@ -40,6 +37,7 @@ * these units are not supported by the architecture. */ LIST_HEAD(dmar_drhd_units); +LIST_HEAD(dmar_rmrr_units); static struct acpi_table_header * __initdata dmar_tbl; @@ -55,6 +53,11 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) list_add(&drhd->list, &dmar_drhd_units); } +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &dmar_rmrr_units); +} + static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, struct pci_dev **dev, u16 segment) { @@ -169,37 +172,19 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) struct acpi_dmar_hardware_unit *drhd; struct dmar_drhd_unit *dmaru; int ret = 0; + static int include_all; dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); if (!dmaru) return -ENOMEM; - dmaru->hdr = header; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru->reg_base_addr = drhd->address; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ - ret = alloc_iommu(dmaru); - if (ret) { - kfree(dmaru); - return ret; - } - dmar_register_drhd_unit(dmaru); - return 0; -} - -static int __init -dmar_parse_dev(struct dmar_drhd_unit *dmaru) -{ - struct acpi_dmar_hardware_unit *drhd; - static int include_all; - int ret; - - drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; - if (!dmaru->include_all) ret = dmar_parse_dev_scope((void *)(drhd + 1), - ((void *)drhd) + drhd->header.length, + ((void *)drhd) + header->length, &dmaru->devices_cnt, &dmaru->devices, drhd->segment); else { @@ -212,59 +197,37 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru) include_all = 1; } - if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) { - list_del(&dmaru->list); + if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) kfree(dmaru); - } + else + dmar_register_drhd_unit(dmaru); return ret; } -#ifdef CONFIG_DMAR -LIST_HEAD(dmar_rmrr_units); - -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ - list_add(&rmrr->list, &dmar_rmrr_units); -} - - static int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; + int ret = 0; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) return -ENOMEM; - rmrru->hdr = header; rmrr = (struct acpi_dmar_reserved_memory *)header; rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; - - dmar_register_rmrr_unit(rmrru); - return 0; -} - -static int __init -rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) -{ - struct acpi_dmar_reserved_memory *rmrr; - int ret; - - rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; ret = dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + rmrr->header.length, + ((void *)rmrr) + header->length, &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - if (ret || (rmrru->devices_cnt == 0)) { - list_del(&rmrru->list); + if (ret || (rmrru->devices_cnt == 0)) kfree(rmrru); - } + else + dmar_register_rmrr_unit(rmrru); return ret; } -#endif static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) @@ -289,7 +252,6 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) } } - /** * parse_dmar_table - parses the DMA reporting table */ @@ -322,9 +284,7 @@ parse_dmar_table(void) ret = dmar_parse_one_drhd(entry_header); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: -#ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); -#endif break; default: printk(KERN_WARNING PREFIX @@ -340,77 +300,15 @@ parse_dmar_table(void) return ret; } -int dmar_pci_device_match(struct pci_dev *devices[], int cnt, - struct pci_dev *dev) -{ - int index; - - while (dev) { - for (index = 0; index < cnt; index++) - if (dev == devices[index]) - return 1; - - /* Check our parent */ - dev = dev->bus->self; - } - - return 0; -} - -struct dmar_drhd_unit * -dmar_find_matched_drhd_unit(struct pci_dev *dev) -{ - struct dmar_drhd_unit *drhd = NULL; - - list_for_each_entry(drhd, &dmar_drhd_units, list) { - if (drhd->include_all || dmar_pci_device_match(drhd->devices, - drhd->devices_cnt, dev)) - return drhd; - } - - return NULL; -} - -int __init dmar_dev_scope_init(void) -{ - struct dmar_drhd_unit *drhd; - int ret = -ENODEV; - - for_each_drhd_unit(drhd) { - ret = dmar_parse_dev(drhd); - if (ret) - return ret; - } - -#ifdef CONFIG_DMAR - { - struct dmar_rmrr_unit *rmrr; - for_each_rmrr_units(rmrr) { - ret = rmrr_parse_dev(rmrr); - if (ret) - return ret; - } - } -#endif - - return ret; -} - int __init dmar_table_init(void) { - static int dmar_table_initialized; - int ret; - - if (dmar_table_initialized) - return 0; - dmar_table_initialized = 1; + int ret; ret = parse_dmar_table(); if (ret) { - if (ret != -ENODEV) - printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); + printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); return ret; } @@ -419,14 +317,9 @@ int __init dmar_table_init(void) return -ENODEV; } -#ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); -#endif -#ifdef CONFIG_INTR_REMAP - parse_ioapics_under_ir(); -#endif return 0; } @@ -448,255 +341,3 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } - -void __init detect_intel_iommu(void) -{ - int ret; - - ret = early_dmar_detect(); - -#ifdef CONFIG_DMAR - { - struct acpi_table_dmar *dmar; - /* - * for now we will disable dma-remapping when interrupt - * remapping is enabled. - * When support for queued invalidation for IOTLB invalidation - * is added, we will not need this any more. - */ - dmar = (struct acpi_table_dmar *) dmar_tbl; - if (ret && cpu_has_x2apic && dmar->flags & 0x1) { - printk(KERN_INFO - "Queued invalidation will be enabled to support " - "x2apic and Intr-remapping.\n"); - printk(KERN_INFO - "Disabling IOMMU detection, because of missing " - "queued invalidation support for IOTLB " - "invalidation\n"); - printk(KERN_INFO - "Use \"nox2apic\", if you want to use Intel " - " IOMMU for DMA-remapping and don't care about " - " x2apic support\n"); - - dmar_disabled = 1; - return; - } - - if (ret && !no_iommu && !iommu_detected && !swiotlb && - !dmar_disabled) - iommu_detected = 1; - } -#endif -} - - -int alloc_iommu(struct dmar_drhd_unit *drhd) -{ - struct intel_iommu *iommu; - int map_size; - u32 ver; - static int iommu_allocated = 0; - - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return -ENOMEM; - - iommu->seq_id = iommu_allocated++; - - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - } - - ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); - - spin_lock_init(&iommu->register_lock); - - drhd->iommu = iommu; - return 0; -error: - kfree(iommu); - return -1; -} - -void free_iommu(struct intel_iommu *iommu) -{ - if (!iommu) - return; - -#ifdef CONFIG_DMAR - free_dmar_iommu(iommu); -#endif - - if (iommu->reg) - iounmap(iommu->reg); - kfree(iommu); -} - -/* - * Reclaim all the submitted descriptors which have completed its work. - */ -static inline void reclaim_free_desc(struct q_inval *qi) -{ - while (qi->desc_status[qi->free_tail] == QI_DONE) { - qi->desc_status[qi->free_tail] = QI_FREE; - qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; - qi->free_cnt++; - } -} - -/* - * Submit the queued invalidation descriptor to the remapping - * hardware unit and wait for its completion. - */ -void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) -{ - struct q_inval *qi = iommu->qi; - struct qi_desc *hw, wait_desc; - int wait_index, index; - unsigned long flags; - - if (!qi) - return; - - hw = qi->desc; - - spin_lock(&qi->q_lock); - while (qi->free_cnt < 3) { - spin_unlock(&qi->q_lock); - cpu_relax(); - spin_lock(&qi->q_lock); - } - - index = qi->free_head; - wait_index = (index + 1) % QI_LENGTH; - - qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; - - hw[index] = *desc; - - wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; - wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); - - hw[wait_index] = wait_desc; - - __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); - __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); - - qi->free_head = (qi->free_head + 2) % QI_LENGTH; - qi->free_cnt -= 2; - - spin_lock_irqsave(&iommu->register_lock, flags); - /* - * update the HW tail register indicating the presence of - * new descriptors. - */ - writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); - spin_unlock_irqrestore(&iommu->register_lock, flags); - - while (qi->desc_status[wait_index] != QI_DONE) { - spin_unlock(&qi->q_lock); - cpu_relax(); - spin_lock(&qi->q_lock); - } - - qi->desc_status[index] = QI_DONE; - - reclaim_free_desc(qi); - spin_unlock(&qi->q_lock); -} - -/* - * Flush the global interrupt entry cache. - */ -void qi_global_iec(struct intel_iommu *iommu) -{ - struct qi_desc desc; - - desc.low = QI_IEC_TYPE; - desc.high = 0; - - qi_submit_sync(&desc, iommu); -} - -/* - * Enable Queued Invalidation interface. This is a must to support - * interrupt-remapping. Also used by DMA-remapping, which replaces - * register based IOTLB invalidation. - */ -int dmar_enable_qi(struct intel_iommu *iommu) -{ - u32 cmd, sts; - unsigned long flags; - struct q_inval *qi; - - if (!ecap_qis(iommu->ecap)) - return -ENOENT; - - /* - * queued invalidation is already setup and enabled. - */ - if (iommu->qi) - return 0; - - iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL); - if (!iommu->qi) - return -ENOMEM; - - qi = iommu->qi; - - qi->desc = (void *)(get_zeroed_page(GFP_KERNEL)); - if (!qi->desc) { - kfree(qi); - iommu->qi = 0; - return -ENOMEM; - } - - qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL); - if (!qi->desc_status) { - free_page((unsigned long) qi->desc); - kfree(qi); - iommu->qi = 0; - return -ENOMEM; - } - - qi->free_head = qi->free_tail = 0; - qi->free_cnt = QI_LENGTH; - - spin_lock_init(&qi->q_lock); - - spin_lock_irqsave(&iommu->register_lock, flags); - /* write zero to the tail reg */ - writel(0, iommu->reg + DMAR_IQT_REG); - - dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); - - cmd = iommu->gcmd | DMA_GCMD_QIE; - iommu->gcmd |= DMA_GCMD_QIE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - /* Make sure hardware complete it */ - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); - - return 0; -} diff --git a/trunk/drivers/pci/intel-iommu.c b/trunk/drivers/pci/intel-iommu.c index 389fdd6f4a9f..6c4c1c3c50ee 100644 --- a/trunk/drivers/pci/intel-iommu.c +++ b/trunk/drivers/pci/intel-iommu.c @@ -49,6 +49,8 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) @@ -56,6 +58,8 @@ static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); +static struct intel_iommu *g_iommus; + #define HIGH_WATER_MARK 250 struct deferred_flush_tables { int next; @@ -181,6 +185,13 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -477,6 +488,19 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) return 0; } +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; @@ -966,8 +990,6 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } - spin_lock_init(&iommu->lock); - /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -976,15 +998,62 @@ static int iommu_init_domains(struct intel_iommu *iommu) set_bit(0, iommu->domain_ids); return 0; } +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) +{ + int ret; + int map_size; + u32 ver; + + iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = PAGE_ALIGN_4K(map_size); + if (map_size > PAGE_SIZE_4K) { + iounmap(iommu->reg); + iommu->reg = ioremap(drhd->reg_base_addr, map_size); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + } + ver = readl(iommu->reg + DMAR_VER_REG); + pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + iommu->cap, iommu->ecap); + ret = iommu_init_domains(iommu); + if (ret) + goto error_unmap; + spin_lock_init(&iommu->lock); + spin_lock_init(&iommu->register_lock); -static void domain_exit(struct dmar_domain *domain); + drhd->iommu = iommu; + return iommu; +error_unmap: + iounmap(iommu->reg); +error: + kfree(iommu); + return NULL; +} -void free_dmar_iommu(struct intel_iommu *iommu) +static void domain_exit(struct dmar_domain *domain); +static void free_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; + if (!iommu) + return; + i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; @@ -1009,6 +1078,10 @@ void free_dmar_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); + + if (iommu->reg) + iounmap(iommu->reg); + kfree(iommu); } static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) @@ -1353,6 +1426,37 @@ find_domain(struct pci_dev *pdev) return NULL; } +static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, + struct pci_dev *dev) +{ + int index; + + while (dev) { + for (index = 0; index < cnt; index++) + if (dev == devices[index]) + return 1; + + /* Check our parent */ + dev = dev->bus->self; + } + + return 0; +} + +static struct dmar_drhd_unit * +dmar_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd = NULL; + + list_for_each_entry(drhd, &dmar_drhd_units, list) { + if (drhd->include_all || dmar_pci_device_match(drhd->devices, + drhd->devices_cnt, dev)) + return drhd; + } + + return NULL; +} + /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { @@ -1625,6 +1729,8 @@ int __init init_dmars(void) * endfor */ for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; g_num_of_iommus++; /* * lock not needed as this is only incremented in the single @@ -1633,6 +1739,12 @@ int __init init_dmars(void) */ } + g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); + if (!g_iommus) { + ret = -ENOMEM; + goto error; + } + deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { @@ -1640,15 +1752,16 @@ int __init init_dmars(void) goto error; } + i = 0; for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - - iommu = drhd->iommu; - - ret = iommu_init_domains(iommu); - if (ret) + iommu = alloc_iommu(&g_iommus[i], drhd); + i++; + if (!iommu) { + ret = -ENOMEM; goto error; + } /* * TBD: @@ -1732,6 +1845,7 @@ int __init init_dmars(void) iommu = drhd->iommu; free_iommu(iommu); } + kfree(g_iommus); return ret; } @@ -1888,10 +2002,7 @@ static void flush_unmaps(void) /* just flush them all */ for (i = 0; i < g_num_of_iommus; i++) { if (deferred_flush[i].next) { - struct intel_iommu *iommu = - deferred_flush[i].domain[0]->iommu; - - iommu_flush_iotlb_global(iommu, 0); + iommu_flush_iotlb_global(&g_iommus[i], 0); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); @@ -1921,8 +2032,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) if (list_size == HIGH_WATER_MARK) flush_unmaps(); - iommu_id = dom->iommu->seq_id; - + iommu_id = dom->iommu - g_iommus; next = deferred_flush[iommu_id].next; deferred_flush[iommu_id].domain[next] = dom; deferred_flush[iommu_id].iova[next] = iova; @@ -2238,6 +2348,38 @@ static void __init iommu_exit_mempool(void) } +static int blacklist_iommu(const struct dmi_system_id *id) +{ + printk(KERN_INFO "%s detected; disabling IOMMU\n", + id->ident); + dmar_disabled = 1; + return 0; +} + +static struct dmi_system_id __initdata intel_iommu_dmi_table[] = { + { /* Some DG33BU BIOS revisions advertised non-existent VT-d */ + .callback = blacklist_iommu, + .ident = "Intel DG33BU", + { DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "DG33BU"), + } + }, + { } +}; + + +void __init detect_intel_iommu(void) +{ + if (swiotlb || no_iommu || iommu_detected || dmar_disabled) + return; + if (early_dmar_detect()) { + dmi_check_system(intel_iommu_dmi_table); + if (dmar_disabled) + return; + iommu_detected = 1; + } +} + static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -2284,19 +2426,12 @@ int __init intel_iommu_init(void) { int ret = 0; - if (dmar_table_init()) - return -ENODEV; - - if (dmar_dev_scope_init()) - return -ENODEV; - - /* - * Check the need for DMA-remapping initialization now. - * Above initialization will also be used by Interrupt-remapping. - */ if (no_iommu || swiotlb || dmar_disabled) return -ENODEV; + if (dmar_table_init()) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/trunk/drivers/pci/intel-iommu.h b/trunk/drivers/pci/intel-iommu.h index 2142c01e0143..afc0ad96122e 100644 --- a/trunk/drivers/pci/intel-iommu.h +++ b/trunk/drivers/pci/intel-iommu.h @@ -27,8 +27,19 @@ #include #include "iova.h" #include -#include -#include "dma_remapping.h" + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) /* * Intel IOMMU register specification per version 1.0 public spec. @@ -52,11 +63,6 @@ #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ -#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ -#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ -#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ -#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) /* @@ -120,10 +126,6 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) -#define ecap_qis(e) ((e) & 0x2) -#define ecap_eim_support(e) ((e >> 4) & 0x1) -#define ecap_ir_support(e) ((e >> 3) & 0x1) -#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) /* IOTLB_REG */ @@ -139,17 +141,6 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) -/* INVALID_DESC */ -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) -#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) -#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) -#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) -#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_ID_TLB_ADDR(addr) (addr) -#define DMA_ID_TLB_ADDR_MASK(mask) (mask) - /* PMEN_REG */ #define DMA_PMEN_EPM (((u32)1)<<31) #define DMA_PMEN_PRS (((u32)1)<<0) @@ -160,9 +151,6 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) #define DMA_GCMD_WBF (((u32)1) << 27) -#define DMA_GCMD_QIE (((u32)1) << 26) -#define DMA_GCMD_SIRTP (((u32)1) << 24) -#define DMA_GCMD_IRE (((u32) 1) << 25) /* GSTS_REG */ #define DMA_GSTS_TES (((u32)1) << 31) @@ -170,9 +158,6 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) #define DMA_GSTS_WBFS (((u32)1) << 27) -#define DMA_GSTS_QIES (((u32)1) << 26) -#define DMA_GSTS_IRTPS (((u32)1) << 24) -#define DMA_GSTS_IRES (((u32)1) << 25) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -202,106 +187,158 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; } -#define QI_LENGTH 256 /* queue length */ +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) -enum { - QI_FREE, - QI_IN_USE, - QI_DONE +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; }; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) -#define QI_CC_TYPE 0x1 -#define QI_IOTLB_TYPE 0x2 -#define QI_DIOTLB_TYPE 0x3 -#define QI_IEC_TYPE 0x4 -#define QI_IWD_TYPE 0x5 +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) -#define QI_IEC_SELECTIVE (((u64)1) << 4) -#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) -#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) -#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) -#define QI_IWD_STATUS_WRITE (((u64)1) << 5) +struct intel_iommu; -struct qi_desc { - u64 low, high; -}; +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ -struct q_inval { - spinlock_t q_lock; - struct qi_desc *desc; /* invalidation queue */ - int *desc_status; /* desc status */ - int free_head; /* first free entry */ - int free_tail; /* last free entry */ - int free_cnt; -}; + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ -#ifdef CONFIG_INTR_REMAP -/* 1MB - maximum possible interrupt remapping table size */ -#define INTR_REMAP_PAGE_ORDER 8 -#define INTR_REMAP_TABLE_REG_SIZE 0xf + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ -#define INTR_REMAP_TABLE_ENTRIES 65536 + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; -struct ir_table { - struct irte *base; +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; }; -#endif + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; u64 ecap; - int seg; - u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t register_lock; /* protect register handling */ - int seq_id; /* sequence id of the iommu */ - -#ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ struct dmar_domain **domains; /* ptr to domains */ + int seg; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t lock; /* protect context, domain ids */ + spinlock_t register_lock; /* protect register handling */ struct root_entry *root_entry; /* virtual address */ unsigned int irq; unsigned char name[7]; /* Device Name */ struct msi_msg saved_msg; struct sys_device sysdev; -#endif - struct q_inval *qi; /* Queued invalidation info */ -#ifdef CONFIG_INTR_REMAP - struct ir_table *ir_table; /* Interrupt remapping info */ -#endif }; -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) { - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); + return; } +#endif /* !CONFIG_DMAR_GFX_WA */ -extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); - -extern int alloc_iommu(struct dmar_drhd_unit *drhd); -extern void free_iommu(struct intel_iommu *iommu); -extern int dmar_enable_qi(struct intel_iommu *iommu); -extern void qi_global_iec(struct intel_iommu *iommu); - -extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); #endif diff --git a/trunk/drivers/pci/intr_remapping.c b/trunk/drivers/pci/intr_remapping.c deleted file mode 100644 index bb642cc5e18c..000000000000 --- a/trunk/drivers/pci/intr_remapping.c +++ /dev/null @@ -1,471 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "intel-iommu.h" -#include "intr_remapping.h" - -static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; -static int ir_ioapic_num; -int intr_remapping_enabled; - -static struct { - struct intel_iommu *iommu; - u16 irte_index; - u16 sub_handle; - u8 irte_mask; -} irq_2_iommu[NR_IRQS]; - -static DEFINE_SPINLOCK(irq_2_ir_lock); - -int irq_remapped(int irq) -{ - if (irq > NR_IRQS) - return 0; - - if (!irq_2_iommu[irq].iommu) - return 0; - - return 1; -} - -int get_irte(int irq, struct irte *entry) -{ - int index; - - if (!entry || irq > NR_IRQS) - return -1; - - spin_lock(&irq_2_ir_lock); - if (!irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; - *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index); - - spin_unlock(&irq_2_ir_lock); - return 0; -} - -int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) -{ - struct ir_table *table = iommu->ir_table; - u16 index, start_index; - unsigned int mask = 0; - int i; - - if (!count) - return -1; - - /* - * start the IRTE search from index 0. - */ - index = start_index = 0; - - if (count > 1) { - count = __roundup_pow_of_two(count); - mask = ilog2(count); - } - - if (mask > ecap_max_handle_mask(iommu->ecap)) { - printk(KERN_ERR - "Requested mask %x exceeds the max invalidation handle" - " mask value %Lx\n", mask, - ecap_max_handle_mask(iommu->ecap)); - return -1; - } - - spin_lock(&irq_2_ir_lock); - do { - for (i = index; i < index + count; i++) - if (table->base[i].present) - break; - /* empty index found */ - if (i == index + count) - break; - - index = (index + count) % INTR_REMAP_TABLE_ENTRIES; - - if (index == start_index) { - spin_unlock(&irq_2_ir_lock); - printk(KERN_ERR "can't allocate an IRTE\n"); - return -1; - } - } while (1); - - for (i = index; i < index + count; i++) - table->base[i].present = 1; - - irq_2_iommu[irq].iommu = iommu; - irq_2_iommu[irq].irte_index = index; - irq_2_iommu[irq].sub_handle = 0; - irq_2_iommu[irq].irte_mask = mask; - - spin_unlock(&irq_2_ir_lock); - - return index; -} - -static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) -{ - struct qi_desc desc; - - desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) - | QI_IEC_SELECTIVE; - desc.high = 0; - - qi_submit_sync(&desc, iommu); -} - -int map_irq_to_irte_handle(int irq, u16 *sub_handle) -{ - int index; - - spin_lock(&irq_2_ir_lock); - if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - *sub_handle = irq_2_iommu[irq].sub_handle; - index = irq_2_iommu[irq].irte_index; - spin_unlock(&irq_2_ir_lock); - return index; -} - -int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) -{ - spin_lock(&irq_2_ir_lock); - if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - irq_2_iommu[irq].iommu = iommu; - irq_2_iommu[irq].irte_index = index; - irq_2_iommu[irq].sub_handle = subhandle; - irq_2_iommu[irq].irte_mask = 0; - - spin_unlock(&irq_2_ir_lock); - - return 0; -} - -int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) -{ - spin_lock(&irq_2_ir_lock); - if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - irq_2_iommu[irq].iommu = NULL; - irq_2_iommu[irq].irte_index = 0; - irq_2_iommu[irq].sub_handle = 0; - irq_2_iommu[irq].irte_mask = 0; - - spin_unlock(&irq_2_ir_lock); - - return 0; -} - -int modify_irte(int irq, struct irte *irte_modified) -{ - int index; - struct irte *irte; - struct intel_iommu *iommu; - - spin_lock(&irq_2_ir_lock); - if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - iommu = irq_2_iommu[irq].iommu; - - index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; - irte = &iommu->ir_table->base[index]; - - set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); - __iommu_flush_cache(iommu, irte, sizeof(*irte)); - - qi_flush_iec(iommu, index, 0); - - spin_unlock(&irq_2_ir_lock); - return 0; -} - -int flush_irte(int irq) -{ - int index; - struct intel_iommu *iommu; - - spin_lock(&irq_2_ir_lock); - if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - iommu = irq_2_iommu[irq].iommu; - - index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; - - qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); - spin_unlock(&irq_2_ir_lock); - - return 0; -} - -struct intel_iommu *map_ioapic_to_ir(int apic) -{ - int i; - - for (i = 0; i < MAX_IO_APICS; i++) - if (ir_ioapic[i].id == apic) - return ir_ioapic[i].iommu; - return NULL; -} - -struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) -{ - struct dmar_drhd_unit *drhd; - - drhd = dmar_find_matched_drhd_unit(dev); - if (!drhd) - return NULL; - - return drhd->iommu; -} - -int free_irte(int irq) -{ - int index, i; - struct irte *irte; - struct intel_iommu *iommu; - - spin_lock(&irq_2_ir_lock); - if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { - spin_unlock(&irq_2_ir_lock); - return -1; - } - - iommu = irq_2_iommu[irq].iommu; - - index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; - irte = &iommu->ir_table->base[index]; - - if (!irq_2_iommu[irq].sub_handle) { - for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++) - set_64bit((unsigned long *)irte, 0); - qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); - } - - irq_2_iommu[irq].iommu = NULL; - irq_2_iommu[irq].irte_index = 0; - irq_2_iommu[irq].sub_handle = 0; - irq_2_iommu[irq].irte_mask = 0; - - spin_unlock(&irq_2_ir_lock); - - return 0; -} - -static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) -{ - u64 addr; - u32 cmd, sts; - unsigned long flags; - - addr = virt_to_phys((void *)iommu->ir_table->base); - - spin_lock_irqsave(&iommu->register_lock, flags); - - dmar_writeq(iommu->reg + DMAR_IRTA_REG, - (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); - - /* Set interrupt-remapping table pointer */ - cmd = iommu->gcmd | DMA_GCMD_SIRTP; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_IRTPS), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); - - /* - * global invalidation of interrupt entry cache before enabling - * interrupt-remapping. - */ - qi_global_iec(iommu); - - spin_lock_irqsave(&iommu->register_lock, flags); - - /* Enable interrupt-remapping */ - cmd = iommu->gcmd | DMA_GCMD_IRE; - iommu->gcmd |= DMA_GCMD_IRE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_IRES), sts); - - spin_unlock_irqrestore(&iommu->register_lock, flags); -} - - -static int setup_intr_remapping(struct intel_iommu *iommu, int mode) -{ - struct ir_table *ir_table; - struct page *pages; - - ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), - GFP_KERNEL); - - if (!iommu->ir_table) - return -ENOMEM; - - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); - - if (!pages) { - printk(KERN_ERR "failed to allocate pages of order %d\n", - INTR_REMAP_PAGE_ORDER); - kfree(iommu->ir_table); - return -ENOMEM; - } - - ir_table->base = page_address(pages); - - iommu_set_intr_remapping(iommu, mode); - return 0; -} - -int __init enable_intr_remapping(int eim) -{ - struct dmar_drhd_unit *drhd; - int setup = 0; - - /* - * check for the Interrupt-remapping support - */ - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - - if (!ecap_ir_support(iommu->ecap)) - continue; - - if (eim && !ecap_eim_support(iommu->ecap)) { - printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " - " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); - return -1; - } - } - - /* - * Enable queued invalidation for all the DRHD's. - */ - for_each_drhd_unit(drhd) { - int ret; - struct intel_iommu *iommu = drhd->iommu; - ret = dmar_enable_qi(iommu); - - if (ret) { - printk(KERN_ERR "DRHD %Lx: failed to enable queued, " - " invalidation, ecap %Lx, ret %d\n", - drhd->reg_base_addr, iommu->ecap, ret); - return -1; - } - } - - /* - * Setup Interrupt-remapping for all the DRHD's now. - */ - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - - if (!ecap_ir_support(iommu->ecap)) - continue; - - if (setup_intr_remapping(iommu, eim)) - goto error; - - setup = 1; - } - - if (!setup) - goto error; - - intr_remapping_enabled = 1; - - return 0; - -error: - /* - * handle error condition gracefully here! - */ - return -1; -} - -static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, - struct intel_iommu *iommu) -{ - struct acpi_dmar_hardware_unit *drhd; - struct acpi_dmar_device_scope *scope; - void *start, *end; - - drhd = (struct acpi_dmar_hardware_unit *)header; - - start = (void *)(drhd + 1); - end = ((void *)drhd) + header->length; - - while (start < end) { - scope = start; - if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { - if (ir_ioapic_num == MAX_IO_APICS) { - printk(KERN_WARNING "Exceeded Max IO APICS\n"); - return -1; - } - - printk(KERN_INFO "IOAPIC id %d under DRHD base" - " 0x%Lx\n", scope->enumeration_id, - drhd->address); - - ir_ioapic[ir_ioapic_num].iommu = iommu; - ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; - ir_ioapic_num++; - } - start += scope->length; - } - - return 0; -} - -/* - * Finds the assocaition between IOAPIC's and its Interrupt-remapping - * hardware unit. - */ -int __init parse_ioapics_under_ir(void) -{ - struct dmar_drhd_unit *drhd; - int ir_supported = 0; - - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu = drhd->iommu; - - if (ecap_ir_support(iommu->ecap)) { - if (ir_parse_ioapic_scope(drhd->hdr, iommu)) - return -1; - - ir_supported = 1; - } - } - - if (ir_supported && ir_ioapic_num != nr_ioapics) { - printk(KERN_WARNING - "Not all IO-APIC's listed under remapping hardware\n"); - return -1; - } - - return ir_supported; -} diff --git a/trunk/drivers/pci/intr_remapping.h b/trunk/drivers/pci/intr_remapping.h deleted file mode 100644 index 05f2635bbe4e..000000000000 --- a/trunk/drivers/pci/intr_remapping.h +++ /dev/null @@ -1,8 +0,0 @@ -#include "intel-iommu.h" - -struct ioapic_scope { - struct intel_iommu *iommu; - unsigned int id; -}; - -#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/trunk/drivers/scsi/ide-scsi.c b/trunk/drivers/scsi/ide-scsi.c index 90212ac33be3..81c16cba5417 100644 --- a/trunk/drivers/scsi/ide-scsi.c +++ b/trunk/drivers/scsi/ide-scsi.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,50 @@ static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive) return scsihost_to_idescsi(ide_drive->driver_data); } +/* + * PIO data transfer routine using the scatter gather table. + */ +static void ide_scsi_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, + unsigned int bcount, int write) +{ + ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; + xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data; + char *buf; + int count; + + while (bcount) { + count = min(pc->sg->length - pc->b_count, bcount); + if (PageHighMem(sg_page(pc->sg))) { + unsigned long flags; + + local_irq_save(flags); + buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + + pc->sg->offset; + xf(drive, NULL, buf + pc->b_count, count); + kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); + local_irq_restore(flags); + } else { + buf = sg_virt(pc->sg); + xf(drive, NULL, buf + pc->b_count, count); + } + bcount -= count; pc->b_count += count; + if (pc->b_count == pc->sg->length) { + if (!--pc->sg_cnt) + break; + pc->sg = sg_next(pc->sg); + pc->b_count = 0; + } + } + + if (bcount) { + printk(KERN_ERR "%s: scatter gather table too small, %s\n", + drive->name, write ? "padding with zeros" + : "discarding data"); + ide_pad_transfer(drive, write, bcount); + } +} + static void ide_scsi_hex_dump(u8 *data, int len) { print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0); @@ -199,9 +244,9 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) { ide_hwif_t *hwif = drive->hwif; - if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) + if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) /* force an abort */ - hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); + hwif->tp_ops->exec_command(hwif, WIN_IDLEIMMEDIATE); rq->errors++; @@ -299,7 +344,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) return ide_pc_intr(drive, pc, idescsi_pc_intr, get_timeout(pc), idescsi_expiry, NULL, NULL, NULL, - ide_io_buffers); + ide_scsi_io_buffers); } static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive) @@ -385,41 +430,21 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r } #ifdef CONFIG_IDE_PROC_FS -#define ide_scsi_devset_get(name, field) \ -static int get_##name(ide_drive_t *drive) \ -{ \ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ - return scsi->field; \ -} - -#define ide_scsi_devset_set(name, field) \ -static int set_##name(ide_drive_t *drive, int arg) \ -{ \ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ - scsi->field = arg; \ - return 0; \ -} - -#define ide_scsi_devset_rw_field(_name, _field) \ -ide_scsi_devset_get(_name, _field); \ -ide_scsi_devset_set(_name, _field); \ -IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name); - -ide_devset_rw_field(bios_cyl, bios_cyl); -ide_devset_rw_field(bios_head, bios_head); -ide_devset_rw_field(bios_sect, bios_sect); - -ide_scsi_devset_rw_field(transform, transform); -ide_scsi_devset_rw_field(log, log); - -static const struct ide_proc_devset idescsi_settings[] = { - IDE_PROC_DEVSET(bios_cyl, 0, 1023), - IDE_PROC_DEVSET(bios_head, 0, 255), - IDE_PROC_DEVSET(bios_sect, 0, 63), - IDE_PROC_DEVSET(log, 0, 1), - IDE_PROC_DEVSET(transform, 0, 3), - { 0 }, -}; +static void idescsi_add_settings(ide_drive_t *drive) +{ + idescsi_scsi_t *scsi = drive_to_idescsi(drive); + +/* + * drive setting name read/write data type min max mul_factor div_factor data pointer set function + */ + ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 1023, 1, 1, &drive->bios_cyl, NULL); + ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); + ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); + ide_add_setting(drive, "transform", SETTING_RW, TYPE_INT, 0, 3, 1, 1, &scsi->transform, NULL); + ide_add_setting(drive, "log", SETTING_RW, TYPE_INT, 0, 1, 1, 1, &scsi->log, NULL); +} +#else +static inline void idescsi_add_settings(ide_drive_t *drive) { ; } #endif /* @@ -427,7 +452,7 @@ static const struct ide_proc_devset idescsi_settings[] = { */ static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) { - if ((drive->id[ATA_ID_CONFIG] & 0x0060) == 0x20) + if (drive->id && (drive->id->config & 0x0060) == 0x20) set_bit(IDE_AFLAG_DRQ_INTERRUPT, &drive->atapi_flags); clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); #if IDESCSI_DEBUG_LOG @@ -436,7 +461,7 @@ static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) drive->pc_callback = ide_scsi_callback; - ide_proc_register_driver(drive, scsi->driver); + idescsi_add_settings(drive); } static void ide_scsi_remove(ide_drive_t *drive) @@ -478,12 +503,12 @@ static ide_driver_t idescsi_driver = { .remove = ide_scsi_remove, .version = IDESCSI_VERSION, .media = ide_scsi, + .supports_dsc_overlap = 0, .do_request = idescsi_do_request, .end_request = idescsi_end_request, .error = idescsi_atapi_error, #ifdef CONFIG_IDE_PROC_FS .proc = idescsi_proc, - .settings = idescsi_settings, #endif }; @@ -786,7 +811,6 @@ static int ide_scsi_probe(ide_drive_t *drive) struct gendisk *g; static int warned; int err = -ENOMEM; - u16 last_lun; if (!warned && drive->media == ide_cdrom) { printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n"); @@ -797,6 +821,7 @@ static int ide_scsi_probe(ide_drive_t *drive) return -ENODEV; if (!strstr("ide-scsi", drive->driver_req) || + !drive->present || drive->media == ide_disk || !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t)))) return -ENODEV; @@ -811,12 +836,12 @@ static int ide_scsi_probe(ide_drive_t *drive) host->max_id = 1; - last_lun = drive->id[ATA_ID_LAST_LUN]; - if (last_lun) - debug_log("%s: last_lun=%u\n", drive->name, last_lun); + if (drive->id->last_lun) + debug_log("%s: id->last_lun=%u\n", drive->name, + drive->id->last_lun); - if ((last_lun & 7) != 7) - host->max_lun = (last_lun & 7) + 1; + if ((drive->id->last_lun & 0x7) != 7) + host->max_lun = (drive->id->last_lun & 0x7) + 1; else host->max_lun = 1; @@ -827,6 +852,7 @@ static int ide_scsi_probe(ide_drive_t *drive) idescsi->host = host; idescsi->disk = g; g->private_data = &idescsi->driver; + ide_proc_register_driver(drive, &idescsi_driver); err = 0; idescsi_setup(drive, idescsi); g->fops = &idescsi_ops; diff --git a/trunk/fs/Kconfig b/trunk/fs/Kconfig index 40183d94b683..abccb5dab9a8 100644 --- a/trunk/fs/Kconfig +++ b/trunk/fs/Kconfig @@ -136,51 +136,37 @@ config EXT3_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. -config EXT4_FS - tristate "The Extended 4 (ext4) filesystem" +config EXT4DEV_FS + tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" + depends on EXPERIMENTAL select JBD2 select CRC16 help - This is the next generation of the ext3 filesystem. + Ext4dev is a predecessor filesystem of the next generation + extended fs ext4, based on ext3 filesystem code. It will be + renamed ext4 fs later, once ext4dev is mature and stabilized. Unlike the change from ext2 filesystem to ext3 filesystem, - the on-disk format of ext4 is not forwards compatible with - ext3; it is based on extent maps and it supports 48-bit - physical block numbers. The ext4 filesystem also supports delayed - allocation, persistent preallocation, high resolution time stamps, - and a number of other features to improve performance and speed - up fsck time. For more information, please see the web pages at - http://ext4.wiki.kernel.org. - - The ext4 filesystem will support mounting an ext3 - filesystem; while there will be some performance gains from - the delayed allocation and inode table readahead, the best - performance gains will require enabling ext4 features in the - filesystem, or formating a new filesystem as an ext4 - filesystem initially. + the on-disk format of ext4dev is not the same as ext3 any more: + it is based on extent maps and it supports 48-bit physical block + numbers. These combined on-disk format changes will allow + ext4dev/ext4 to handle more than 16 TB filesystem volumes -- + a hard limit that ext3 cannot overcome without changing the + on-disk format. + + Other than extent maps and 48-bit block numbers, ext4dev also is + likely to have other new features such as persistent preallocation, + high resolution time stamps, and larger file support etc. These + features will be added to ext4dev gradually. To compile this file system support as a module, choose M here. The module will be called ext4dev. If unsure, say N. -config EXT4DEV_COMPAT - bool "Enable ext4dev compatibility" - depends on EXT4_FS - help - Starting with 2.6.28, the name of the ext4 filesystem was - renamed from ext4dev to ext4. Unfortunately there are some - lagecy userspace programs (such as klibc's fstype) have - "ext4dev" hardcoded. - - To enable backwards compatibility so that systems that are - still expecting to mount ext4 filesystems using ext4dev, - chose Y here. This feature will go away by 2.6.31, so - please arrange to get your userspace programs fixed! - -config EXT4_FS_XATTR - bool "Ext4 extended attributes" - depends on EXT4_FS +config EXT4DEV_FS_XATTR + bool "Ext4dev extended attributes" + depends on EXT4DEV_FS default y help Extended attributes are name:value pairs associated with inodes by @@ -189,11 +175,11 @@ config EXT4_FS_XATTR If unsure, say N. - You need this for POSIX ACL support on ext4. + You need this for POSIX ACL support on ext4dev/ext4. -config EXT4_FS_POSIX_ACL - bool "Ext4 POSIX Access Control Lists" - depends on EXT4_FS_XATTR +config EXT4DEV_FS_POSIX_ACL + bool "Ext4dev POSIX Access Control Lists" + depends on EXT4DEV_FS_XATTR select FS_POSIX_ACL help POSIX Access Control Lists (ACLs) support permissions for users and @@ -204,14 +190,14 @@ config EXT4_FS_POSIX_ACL If you don't know what Access Control Lists are, say N -config EXT4_FS_SECURITY - bool "Ext4 Security Labels" - depends on EXT4_FS_XATTR +config EXT4DEV_FS_SECURITY + bool "Ext4dev Security Labels" + depends on EXT4DEV_FS_XATTR help Security labels support alternative access control models implemented by security modules like SELinux. This option enables an extended attribute handler for file security - labels in the ext4 filesystem. + labels in the ext4dev/ext4 filesystem. If you are not using a security module that requires using extended attributes for file security labels, say N. @@ -254,22 +240,22 @@ config JBD2 help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by - the ext4 filesystem, but it could also be used to add + the ext4dev/ext4 filesystem, but it could also be used to add journal support to other file systems or block devices such as RAID or LVM. - If you are using ext4, you need to say Y here. If you are not - using ext4 then you will probably want to say N. + If you are using ext4dev/ext4, you need to say Y here. If you are not + using ext4dev/ext4 then you will probably want to say N. To compile this device as a module, choose M here. The module will be - called jbd2. If you are compiling ext4 into the kernel, + called jbd2. If you are compiling ext4dev/ext4 into the kernel, you cannot compile this code as a module. config JBD2_DEBUG - bool "JBD2 (ext4) debugging support" + bool "JBD2 (ext4dev/ext4) debugging support" depends on JBD2 && DEBUG_FS help - If you are using the ext4 journaled file system (or + If you are using the ext4dev/ext4 journaled file system (or potentially any other filesystem/device using JBD2), this option allows you to enable debugging output while the system is running, in order to help track down any problems you are having. @@ -284,9 +270,9 @@ config JBD2_DEBUG config FS_MBCACHE # Meta block cache for Extended Attributes (ext2/ext3/ext4) tristate - depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR - default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y - default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m + depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR + default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y + default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m config REISERFS_FS tristate "Reiserfs support" diff --git a/trunk/fs/Makefile b/trunk/fs/Makefile index de404b00eb0c..a1482a5eff15 100644 --- a/trunk/fs/Makefile +++ b/trunk/fs/Makefile @@ -69,7 +69,7 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 -obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev +obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev obj-$(CONFIG_JBD) += jbd/ obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_EXT2_FS) += ext2/ diff --git a/trunk/fs/ext2/ext2.h b/trunk/fs/ext2/ext2.h index bae998c1e44e..47d88da2d33b 100644 --- a/trunk/fs/ext2/ext2.h +++ b/trunk/fs/ext2/ext2.h @@ -133,8 +133,6 @@ extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); extern void ext2_get_inode_flags(struct ext2_inode_info *); -extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len); int __ext2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); diff --git a/trunk/fs/ext2/file.c b/trunk/fs/ext2/file.c index 45ed07122182..5f2fa9c36293 100644 --- a/trunk/fs/ext2/file.c +++ b/trunk/fs/ext2/file.c @@ -86,5 +86,4 @@ const struct inode_operations ext2_file_inode_operations = { #endif .setattr = ext2_setattr, .permission = ext2_permission, - .fiemap = ext2_fiemap, }; diff --git a/trunk/fs/ext2/inode.c b/trunk/fs/ext2/inode.c index 7658b33e2653..991d6dfeb51f 100644 --- a/trunk/fs/ext2/inode.c +++ b/trunk/fs/ext2/inode.c @@ -31,7 +31,6 @@ #include #include #include -#include #include "ext2.h" #include "acl.h" #include "xip.h" @@ -705,13 +704,6 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ } -int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len) -{ - return generic_block_fiemap(inode, fieinfo, start, len, - ext2_get_block); -} - static int ext2_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, ext2_get_block, wbc); diff --git a/trunk/fs/ext3/file.c b/trunk/fs/ext3/file.c index 3be1e0689c9a..acc4913d3019 100644 --- a/trunk/fs/ext3/file.c +++ b/trunk/fs/ext3/file.c @@ -134,6 +134,5 @@ const struct inode_operations ext3_file_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext3_permission, - .fiemap = ext3_fiemap, }; diff --git a/trunk/fs/ext3/inode.c b/trunk/fs/ext3/inode.c index ebfec4d0148e..507d8689b111 100644 --- a/trunk/fs/ext3/inode.c +++ b/trunk/fs/ext3/inode.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "xattr.h" #include "acl.h" @@ -982,13 +981,6 @@ static int ext3_get_block(struct inode *inode, sector_t iblock, return ret; } -int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len) -{ - return generic_block_fiemap(inode, fieinfo, start, len, - ext3_get_block); -} - /* * `handle' can be NULL if create is zero */ diff --git a/trunk/fs/ext4/Makefile b/trunk/fs/ext4/Makefile index a8ff003a00f7..ac6fa8ca0a2f 100644 --- a/trunk/fs/ext4/Makefile +++ b/trunk/fs/ext4/Makefile @@ -2,12 +2,12 @@ # Makefile for the linux ext4-filesystem routines. # -obj-$(CONFIG_EXT4_FS) += ext4.o +obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o -ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o -ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o -ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o +ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o +ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o diff --git a/trunk/fs/ext4/acl.h b/trunk/fs/ext4/acl.h index cb45257a246e..cd2b855a07d6 100644 --- a/trunk/fs/ext4/acl.h +++ b/trunk/fs/ext4/acl.h @@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) } } -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl if the ACL has not been cached */ #define EXT4_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext4_permission(struct inode *, int); -extern int ext4_acl_chmod(struct inode *); -extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); +extern int ext4_permission (struct inode *, int); +extern int ext4_acl_chmod (struct inode *); +extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); -#else /* CONFIG_EXT4_FS_POSIX_ACL */ +#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ #include #define ext4_permission NULL @@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) { return 0; } -#endif /* CONFIG_EXT4_FS_POSIX_ACL */ +#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ diff --git a/trunk/fs/ext4/balloc.c b/trunk/fs/ext4/balloc.c index bd2ece228827..e9fa960ba6da 100644 --- a/trunk/fs/ext4/balloc.c +++ b/trunk/fs/ext4/balloc.c @@ -83,7 +83,6 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, } return used_blocks; } - /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, @@ -133,7 +132,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ group_blocks = ext4_blocks_count(sbi->s_es) - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); + (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } @@ -201,20 +200,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * @bh: pointer to the buffer head to store the block * group descriptor */ -struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, +struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, - struct buffer_head **bh) + struct buffer_head ** bh) { unsigned long group_desc; unsigned long offset; - struct ext4_group_desc *desc; + struct ext4_group_desc * desc; struct ext4_sb_info *sbi = EXT4_SB(sb); if (block_group >= sbi->s_groups_count) { - ext4_error(sb, "ext4_get_group_desc", - "block_group >= groups_count - " - "block_group = %lu, groups_count = %lu", - block_group, sbi->s_groups_count); + ext4_error (sb, "ext4_get_group_desc", + "block_group >= groups_count - " + "block_group = %lu, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -223,10 +222,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext4_error(sb, "ext4_get_group_desc", - "Group descriptor not loaded - " - "block_group = %lu, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + ext4_error (sb, "ext4_get_group_desc", + "Group descriptor not loaded - " + "block_group = %lu, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } @@ -303,8 +302,8 @@ static int ext4_valid_block_bitmap(struct super_block *sb, struct buffer_head * ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) { - struct ext4_group_desc *desc; - struct buffer_head *bh = NULL; + struct ext4_group_desc * desc; + struct buffer_head * bh = NULL; ext4_fsblk_t bitmap_blk; desc = ext4_get_group_desc(sb, block_group, NULL); @@ -319,11 +318,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - if (buffer_uptodate(bh) && - !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) + if (bh_uptodate_or_lock(bh)) return bh; - lock_buffer(bh); spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); @@ -348,6 +345,301 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) */ return bh; } +/* + * The reservation window structure operations + * -------------------------------------------- + * Operations include: + * dump, find, add, remove, is_empty, find_next_reservable_window, etc. + * + * We use a red-black tree to represent per-filesystem reservation + * windows. + * + */ + +/** + * __rsv_window_dump() -- Dump the filesystem block allocation reservation map + * @rb_root: root of per-filesystem reservation rb tree + * @verbose: verbose mode + * @fn: function which wishes to dump the reservation map + * + * If verbose is turned on, it will print the whole block reservation + * windows(start, end). Otherwise, it will only print out the "bad" windows, + * those windows that overlap with their immediate neighbors. + */ +#if 1 +static void __rsv_window_dump(struct rb_root *root, int verbose, + const char *fn) +{ + struct rb_node *n; + struct ext4_reserve_window_node *rsv, *prev; + int bad; + +restart: + n = rb_first(root); + bad = 0; + prev = NULL; + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + while (n) { + rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); + if (verbose) + printk("reservation window 0x%p " + "start: %llu, end: %llu\n", + rsv, rsv->rsv_start, rsv->rsv_end); + if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { + printk("Bad reservation %p (start >= end)\n", + rsv); + bad = 1; + } + if (prev && prev->rsv_end >= rsv->rsv_start) { + printk("Bad reservation %p (prev->end >= start)\n", + rsv); + bad = 1; + } + if (bad) { + if (!verbose) { + printk("Restarting reservation walk in verbose mode\n"); + verbose = 1; + goto restart; + } + } + n = rb_next(n); + prev = rsv; + } + printk("Window map complete.\n"); + BUG_ON(bad); +} +#define rsv_window_dump(root, verbose) \ + __rsv_window_dump((root), (verbose), __func__) +#else +#define rsv_window_dump(root, verbose) do {} while (0) +#endif + +/** + * goal_in_my_reservation() + * @rsv: inode's reservation window + * @grp_goal: given goal block relative to the allocation block group + * @group: the current allocation block group + * @sb: filesystem super block + * + * Test if the given goal block (group relative) is within the file's + * own block reservation window range. + * + * If the reservation window is outside the goal allocation group, return 0; + * grp_goal (given goal block) could be -1, which means no specific + * goal block. In this case, always return 1. + * If the goal block is within the reservation window, return 1; + * otherwise, return 0; + */ +static int +goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, + ext4_group_t group, struct super_block *sb) +{ + ext4_fsblk_t group_first_block, group_last_block; + + group_first_block = ext4_group_first_block_no(sb, group); + group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + if ((rsv->_rsv_start > group_last_block) || + (rsv->_rsv_end < group_first_block)) + return 0; + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) + || (grp_goal + group_first_block > rsv->_rsv_end))) + return 0; + return 1; +} + +/** + * search_reserve_window() + * @rb_root: root of reservation tree + * @goal: target allocation block + * + * Find the reserved window which includes the goal, or the previous one + * if the goal is not in any window. + * Returns NULL if there are no windows or if all windows start after the goal. + */ +static struct ext4_reserve_window_node * +search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) +{ + struct rb_node *n = root->rb_node; + struct ext4_reserve_window_node *rsv; + + if (!n) + return NULL; + + do { + rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); + + if (goal < rsv->rsv_start) + n = n->rb_left; + else if (goal > rsv->rsv_end) + n = n->rb_right; + else + return rsv; + } while (n); + /* + * We've fallen off the end of the tree: the goal wasn't inside + * any particular node. OK, the previous node must be to one + * side of the interval containing the goal. If it's the RHS, + * we need to back up one. + */ + if (rsv->rsv_start > goal) { + n = rb_prev(&rsv->rsv_node); + rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); + } + return rsv; +} + +/** + * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. + * @sb: super block + * @rsv: reservation window to add + * + * Must be called with rsv_lock hold. + */ +void ext4_rsv_window_add(struct super_block *sb, + struct ext4_reserve_window_node *rsv) +{ + struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; + struct rb_node *node = &rsv->rsv_node; + ext4_fsblk_t start = rsv->rsv_start; + + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct ext4_reserve_window_node *this; + + while (*p) + { + parent = *p; + this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); + + if (start < this->rsv_start) + p = &(*p)->rb_left; + else if (start > this->rsv_end) + p = &(*p)->rb_right; + else { + rsv_window_dump(root, 1); + BUG(); + } + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); +} + +/** + * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree + * @sb: super block + * @rsv: reservation window to remove + * + * Mark the block reservation window as not allocated, and unlink it + * from the filesystem reservation window rb tree. Must be called with + * rsv_lock hold. + */ +static void rsv_window_remove(struct super_block *sb, + struct ext4_reserve_window_node *rsv) +{ + rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_alloc_hit = 0; + rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); +} + +/* + * rsv_is_empty() -- Check if the reservation window is allocated. + * @rsv: given reservation window to check + * + * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. + */ +static inline int rsv_is_empty(struct ext4_reserve_window *rsv) +{ + /* a valid reservation end block could not be 0 */ + return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; +} + +/** + * ext4_init_block_alloc_info() + * @inode: file inode structure + * + * Allocate and initialize the reservation window structure, and + * link the window to the ext4 inode structure at last + * + * The reservation window structure is only dynamically allocated + * and linked to ext4 inode the first time the open file + * needs a new block. So, before every ext4_new_block(s) call, for + * regular files, we should check whether the reservation window + * structure exists or not. In the latter case, this function is called. + * Fail to do so will result in block reservation being turned off for that + * open file. + * + * This function is called from ext4_get_blocks_handle(), also called + * when setting the reservation window size through ioctl before the file + * is open for write (needs block allocation). + * + * Needs down_write(i_data_sem) protection prior to call this function. + */ +void ext4_init_block_alloc_info(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; + struct super_block *sb = inode->i_sb; + + block_i = kmalloc(sizeof(*block_i), GFP_NOFS); + if (block_i) { + struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; + + rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + + /* + * if filesystem is mounted with NORESERVATION, the goal + * reservation window size is set to zero to indicate + * block reservation is off + */ + if (!test_opt(sb, RESERVATION)) + rsv->rsv_goal_size = 0; + else + rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; + rsv->rsv_alloc_hit = 0; + block_i->last_alloc_logical_block = 0; + block_i->last_alloc_physical_block = 0; + } + ei->i_block_alloc_info = block_i; +} + +/** + * ext4_discard_reservation() + * @inode: inode + * + * Discard(free) block reservation window on last file close, or truncate + * or at last iput(). + * + * It is being called in three cases: + * ext4_release_file(): last writer close the file + * ext4_clear_inode(): last iput(), when nobody link to this file. + * ext4_truncate(): when the block indirect map is about to change. + * + */ +void ext4_discard_reservation(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; + struct ext4_reserve_window_node *rsv; + spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; + + ext4_mb_discard_inode_preallocations(inode); + + if (!block_i) + return; + + rsv = &block_i->rsv_window_node; + if (!rsv_is_empty(&rsv->rsv_window)) { + spin_lock(rsv_lock); + if (!rsv_is_empty(&rsv->rsv_window)) + rsv_window_remove(inode->i_sb, rsv); + spin_unlock(rsv_lock); + } +} /** * ext4_free_blocks_sb() -- Free given blocks and update quota @@ -356,13 +648,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) * @block: start physcial block to free * @count: number of blocks to free * @pdquot_freed_blocks: pointer to quota - * - * XXX This function is only used by the on-line resizing code, which - * should probably be fixed up to call the mballoc variant. There - * this needs to be cleaned up later; in fact, I'm not convinced this - * is 100% correct in the face of the mballoc code. The online resizing - * code needs to be fixed up to more tightly (and correctly) interlock - * with the mballoc code. */ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count, @@ -374,8 +659,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, ext4_grpblk_t bit; unsigned long i; unsigned long overflow; - struct ext4_group_desc *desc; - struct ext4_super_block *es; + struct ext4_group_desc * desc; + struct ext4_super_block * es; struct ext4_sb_info *sbi; int err = 0, ret; ext4_grpblk_t group_freed; @@ -386,13 +671,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || block + count > ext4_blocks_count(es)) { - ext4_error(sb, "ext4_free_blocks", - "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); + ext4_error (sb, "ext4_free_blocks", + "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); goto error_return; } - ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); + ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); do_more: overflow = 0; @@ -409,7 +694,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) goto error_return; - desc = ext4_get_group_desc(sb, block_group, &gd_bh); + desc = ext4_get_group_desc (sb, block_group, &gd_bh); if (!desc) goto error_return; @@ -418,10 +703,10 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, desc), sbi->s_itb_per_group)) { - ext4_error(sb, "ext4_free_blocks", - "Freeing blocks in system zones - " - "Block = %llu, count = %lu", - block, count); + ext4_error (sb, "ext4_free_blocks", + "Freeing blocks in system zones - " + "Block = %llu, count = %lu", + block, count); goto error_return; } @@ -563,7 +848,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int metadata) { - struct super_block *sb; + struct super_block * sb; unsigned long dquot_freed_blocks; /* this isn't the right place to decide whether block is metadata @@ -574,51 +859,747 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sb = inode->i_sb; - ext4_mb_free_blocks(handle, inode, block, count, - metadata, &dquot_freed_blocks); + if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) + ext4_free_blocks_sb(handle, sb, block, count, + &dquot_freed_blocks); + else + ext4_mb_free_blocks(handle, inode, block, count, + metadata, &dquot_freed_blocks); if (dquot_freed_blocks) DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); return; } -int ext4_claim_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks) +/** + * ext4_test_allocatable() + * @nr: given allocation block group + * @bh: bufferhead contains the bitmap of the given block group + * + * For ext4 allocations, we must not reuse any blocks which are + * allocated in the bitmap buffer's "last committed data" copy. This + * prevents deletes from freeing up the page for reuse until we have + * committed the delete transaction. + * + * If we didn't do this, then deleting something and reallocating it as + * data would allow the old block to be overwritten before the + * transaction committed (because we force data to disk before commit). + * This would lead to corruption if we crashed between overwriting the + * data and committing the delete. + * + * @@@ We may want to make this allocation behaviour conditional on + * data-writes at some point, and disable it for metadata allocations or + * sync-data inodes. + */ +static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) { - s64 free_blocks, dirty_blocks; - s64 root_blocks = 0; - struct percpu_counter *fbc = &sbi->s_freeblocks_counter; - struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; + int ret; + struct journal_head *jh = bh2jh(bh); - free_blocks = percpu_counter_read_positive(fbc); - dirty_blocks = percpu_counter_read_positive(dbc); + if (ext4_test_bit(nr, bh->b_data)) + return 0; - if (!capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) - root_blocks = ext4_r_blocks_count(sbi->s_es); + jbd_lock_bh_state(bh); + if (!jh->b_committed_data) + ret = 1; + else + ret = !ext4_test_bit(nr, jh->b_committed_data); + jbd_unlock_bh_state(bh); + return ret; +} + +/** + * bitmap_search_next_usable_block() + * @start: the starting block (group relative) of the search + * @bh: bufferhead contains the block group bitmap + * @maxblocks: the ending block (group relative) of the reservation + * + * The bitmap search --- search forward alternately through the actual + * bitmap on disk and the last-committed copy in journal, until we find a + * bit free in both bitmaps. + */ +static ext4_grpblk_t +bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, + ext4_grpblk_t maxblocks) +{ + ext4_grpblk_t next; + struct journal_head *jh = bh2jh(bh); + + while (start < maxblocks) { + next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); + if (next >= maxblocks) + return -1; + if (ext4_test_allocatable(next, bh)) + return next; + jbd_lock_bh_state(bh); + if (jh->b_committed_data) + start = ext4_find_next_zero_bit(jh->b_committed_data, + maxblocks, next); + jbd_unlock_bh_state(bh); + } + return -1; +} - if (free_blocks - (nblocks + root_blocks + dirty_blocks) < - EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); - if (dirty_blocks < 0) { - printk(KERN_CRIT "Dirty block accounting " - "went wrong %lld\n", - dirty_blocks); +/** + * find_next_usable_block() + * @start: the starting block (group relative) to find next + * allocatable block in bitmap. + * @bh: bufferhead contains the block group bitmap + * @maxblocks: the ending block (group relative) for the search + * + * Find an allocatable block in a bitmap. We honor both the bitmap and + * its last-committed copy (if that exists), and perform the "most + * appropriate allocation" algorithm of looking for a free block near + * the initial goal; then for a free byte somewhere in the bitmap; then + * for any free bit in the bitmap. + */ +static ext4_grpblk_t +find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, + ext4_grpblk_t maxblocks) +{ + ext4_grpblk_t here, next; + char *p, *r; + + if (start > 0) { + /* + * The goal was occupied; search forward for a free + * block within the next XX blocks. + * + * end_goal is more or less random, but it has to be + * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the + * next 64-bit boundary is simple.. + */ + ext4_grpblk_t end_goal = (start + 63) & ~63; + if (end_goal > maxblocks) + end_goal = maxblocks; + here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); + if (here < end_goal && ext4_test_allocatable(here, bh)) + return here; + ext4_debug("Bit not found near goal\n"); + } + + here = start; + if (here < 0) + here = 0; + + p = ((char *)bh->b_data) + (here >> 3); + r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); + next = (r - ((char *)bh->b_data)) << 3; + + if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) + return next; + + /* + * The bitmap search --- search forward alternately through the actual + * bitmap and the last-committed copy until we find a bit free in + * both + */ + here = bitmap_search_next_usable_block(here, bh, maxblocks); + return here; +} + +/** + * claim_block() + * @block: the free block (group relative) to allocate + * @bh: the bufferhead containts the block group bitmap + * + * We think we can allocate this block in this bitmap. Try to set the bit. + * If that succeeds then check that nobody has allocated and then freed the + * block since we saw that is was not marked in b_committed_data. If it _was_ + * allocated and freed then clear the bit in the bitmap again and return + * zero (failure). + */ +static inline int +claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) +{ + struct journal_head *jh = bh2jh(bh); + int ret; + + if (ext4_set_bit_atomic(lock, block, bh->b_data)) + return 0; + jbd_lock_bh_state(bh); + if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { + ext4_clear_bit_atomic(lock, block, bh->b_data); + ret = 0; + } else { + ret = 1; + } + jbd_unlock_bh_state(bh); + return ret; +} + +/** + * ext4_try_to_allocate() + * @sb: superblock + * @handle: handle to this transaction + * @group: given allocation block group + * @bitmap_bh: bufferhead holds the block bitmap + * @grp_goal: given target block within the group + * @count: target number of blocks to allocate + * @my_rsv: reservation window + * + * Attempt to allocate blocks within a give range. Set the range of allocation + * first, then find the first free bit(s) from the bitmap (within the range), + * and at last, allocate the blocks by claiming the found free bit as allocated. + * + * To set the range of this allocation: + * if there is a reservation window, only try to allocate block(s) from the + * file's own reservation window; + * Otherwise, the allocation range starts from the give goal block, ends at + * the block group's last block. + * + * If we failed to allocate the desired block then we may end up crossing to a + * new bitmap. In that case we must release write access to the old one via + * ext4_journal_release_buffer(), else we'll run out of credits. + */ +static ext4_grpblk_t +ext4_try_to_allocate(struct super_block *sb, handle_t *handle, + ext4_group_t group, struct buffer_head *bitmap_bh, + ext4_grpblk_t grp_goal, unsigned long *count, + struct ext4_reserve_window *my_rsv) +{ + ext4_fsblk_t group_first_block; + ext4_grpblk_t start, end; + unsigned long num = 0; + + /* we do allocation within the reservation window if we have a window */ + if (my_rsv) { + group_first_block = ext4_group_first_block_no(sb, group); + if (my_rsv->_rsv_start >= group_first_block) + start = my_rsv->_rsv_start - group_first_block; + else + /* reservation window cross group boundary */ + start = 0; + end = my_rsv->_rsv_end - group_first_block + 1; + if (end > EXT4_BLOCKS_PER_GROUP(sb)) + /* reservation window crosses group boundary */ + end = EXT4_BLOCKS_PER_GROUP(sb); + if ((start <= grp_goal) && (grp_goal < end)) + start = grp_goal; + else + grp_goal = -1; + } else { + if (grp_goal > 0) + start = grp_goal; + else + start = 0; + end = EXT4_BLOCKS_PER_GROUP(sb); + } + + BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); + +repeat: + if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { + grp_goal = find_next_usable_block(start, bitmap_bh, end); + if (grp_goal < 0) + goto fail_access; + if (!my_rsv) { + int i; + + for (i = 0; i < 7 && grp_goal > start && + ext4_test_allocatable(grp_goal - 1, + bitmap_bh); + i++, grp_goal--) + ; + } + } + start = grp_goal; + + if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), + grp_goal, bitmap_bh)) { + /* + * The block was allocated by another thread, or it was + * allocated and then freed by another thread + */ + start++; + grp_goal++; + if (start >= end) + goto fail_access; + goto repeat; + } + num++; + grp_goal++; + while (num < *count && grp_goal < end + && ext4_test_allocatable(grp_goal, bitmap_bh) + && claim_block(sb_bgl_lock(EXT4_SB(sb), group), + grp_goal, bitmap_bh)) { + num++; + grp_goal++; + } + *count = num; + return grp_goal - num; +fail_access: + *count = num; + return -1; +} + +/** + * find_next_reservable_window(): + * find a reservable space within the given range. + * It does not allocate the reservation window for now: + * alloc_new_reservation() will do the work later. + * + * @search_head: the head of the searching list; + * This is not necessarily the list head of the whole filesystem + * + * We have both head and start_block to assist the search + * for the reservable space. The list starts from head, + * but we will shift to the place where start_block is, + * then start from there, when looking for a reservable space. + * + * @size: the target new reservation window size + * + * @group_first_block: the first block we consider to start + * the real search from + * + * @last_block: + * the maximum block number that our goal reservable space + * could start from. This is normally the last block in this + * group. The search will end when we found the start of next + * possible reservable space is out of this boundary. + * This could handle the cross boundary reservation window + * request. + * + * basically we search from the given range, rather than the whole + * reservation double linked list, (start_block, last_block) + * to find a free region that is of my size and has not + * been reserved. + * + */ +static int find_next_reservable_window( + struct ext4_reserve_window_node *search_head, + struct ext4_reserve_window_node *my_rsv, + struct super_block * sb, + ext4_fsblk_t start_block, + ext4_fsblk_t last_block) +{ + struct rb_node *next; + struct ext4_reserve_window_node *rsv, *prev; + ext4_fsblk_t cur; + int size = my_rsv->rsv_goal_size; + + /* TODO: make the start of the reservation window byte-aligned */ + /* cur = *start_block & ~7;*/ + cur = start_block; + rsv = search_head; + if (!rsv) + return -1; + + while (1) { + if (cur <= rsv->rsv_end) + cur = rsv->rsv_end + 1; + + /* TODO? + * in the case we could not find a reservable space + * that is what is expected, during the re-search, we could + * remember what's the largest reservable space we could have + * and return that one. + * + * For now it will fail if we could not find the reservable + * space with expected-size (or more)... + */ + if (cur > last_block) + return -1; /* fail */ + + prev = rsv; + next = rb_next(&rsv->rsv_node); + rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); + + /* + * Reached the last reservation, we can just append to the + * previous one. + */ + if (!next) + break; + + if (cur + size <= rsv->rsv_start) { + /* + * Found a reserveable space big enough. We could + * have a reservation across the group boundary here + */ + break; } } - /* Check whether we have space after - * accounting for current dirty blocks + /* + * we come here either : + * when we reach the end of the whole list, + * and there is empty reservable space after last entry in the list. + * append it to the end of the list. + * + * or we found one reservable space in the middle of the list, + * return the reservation window that we could append to. + * succeed. */ - if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) - /* we don't have free space */ - return -ENOSPC; - /* Add the blocks to nblocks */ - percpu_counter_add(dbc, nblocks); + if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) + rsv_window_remove(sb, my_rsv); + + /* + * Let's book the whole avaliable window for now. We will check the + * disk bitmap later and then, if there are free blocks then we adjust + * the window size if it's larger than requested. + * Otherwise, we will remove this node from the tree next time + * call find_next_reservable_window. + */ + my_rsv->rsv_start = cur; + my_rsv->rsv_end = cur + size - 1; + my_rsv->rsv_alloc_hit = 0; + + if (prev != my_rsv) + ext4_rsv_window_add(sb, my_rsv); + return 0; } +/** + * alloc_new_reservation()--allocate a new reservation window + * + * To make a new reservation, we search part of the filesystem + * reservation list (the list that inside the group). We try to + * allocate a new reservation window near the allocation goal, + * or the beginning of the group, if there is no goal. + * + * We first find a reservable space after the goal, then from + * there, we check the bitmap for the first free block after + * it. If there is no free block until the end of group, then the + * whole group is full, we failed. Otherwise, check if the free + * block is inside the expected reservable space, if so, we + * succeed. + * If the first free block is outside the reservable space, then + * start from the first free block, we search for next available + * space, and go on. + * + * on succeed, a new reservation will be found and inserted into the list + * It contains at least one free block, and it does not overlap with other + * reservation windows. + * + * failed: we failed to find a reservation window in this group + * + * @rsv: the reservation + * + * @grp_goal: The goal (group-relative). It is where the search for a + * free reservable space should start from. + * if we have a grp_goal(grp_goal >0 ), then start from there, + * no grp_goal(grp_goal = -1), we start from the first block + * of the group. + * + * @sb: the super block + * @group: the group we are trying to allocate in + * @bitmap_bh: the block group block bitmap + * + */ +static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, + ext4_grpblk_t grp_goal, struct super_block *sb, + ext4_group_t group, struct buffer_head *bitmap_bh) +{ + struct ext4_reserve_window_node *search_head; + ext4_fsblk_t group_first_block, group_end_block, start_block; + ext4_grpblk_t first_free_block; + struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; + unsigned long size; + int ret; + spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; + + group_first_block = ext4_group_first_block_no(sb, group); + group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + if (grp_goal < 0) + start_block = group_first_block; + else + start_block = grp_goal + group_first_block; + + size = my_rsv->rsv_goal_size; + + if (!rsv_is_empty(&my_rsv->rsv_window)) { + /* + * if the old reservation is cross group boundary + * and if the goal is inside the old reservation window, + * we will come here when we just failed to allocate from + * the first part of the window. We still have another part + * that belongs to the next group. In this case, there is no + * point to discard our window and try to allocate a new one + * in this group(which will fail). we should + * keep the reservation window, just simply move on. + * + * Maybe we could shift the start block of the reservation + * window to the first block of next group. + */ + + if ((my_rsv->rsv_start <= group_end_block) && + (my_rsv->rsv_end > group_end_block) && + (start_block >= my_rsv->rsv_start)) + return -1; + + if ((my_rsv->rsv_alloc_hit > + (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { + /* + * if the previously allocation hit ratio is + * greater than 1/2, then we double the size of + * the reservation window the next time, + * otherwise we keep the same size window + */ + size = size * 2; + if (size > EXT4_MAX_RESERVE_BLOCKS) + size = EXT4_MAX_RESERVE_BLOCKS; + my_rsv->rsv_goal_size= size; + } + } + + spin_lock(rsv_lock); + /* + * shift the search start to the window near the goal block + */ + search_head = search_reserve_window(fs_rsv_root, start_block); + + /* + * find_next_reservable_window() simply finds a reservable window + * inside the given range(start_block, group_end_block). + * + * To make sure the reservation window has a free bit inside it, we + * need to check the bitmap after we found a reservable window. + */ +retry: + ret = find_next_reservable_window(search_head, my_rsv, sb, + start_block, group_end_block); + + if (ret == -1) { + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; + } + + /* + * On success, find_next_reservable_window() returns the + * reservation window where there is a reservable space after it. + * Before we reserve this reservable space, we need + * to make sure there is at least a free block inside this region. + * + * searching the first free bit on the block bitmap and copy of + * last committed bitmap alternatively, until we found a allocatable + * block. Search start from the start block of the reservable space + * we just found. + */ + spin_unlock(rsv_lock); + first_free_block = bitmap_search_next_usable_block( + my_rsv->rsv_start - group_first_block, + bitmap_bh, group_end_block - group_first_block + 1); + + if (first_free_block < 0) { + /* + * no free block left on the bitmap, no point + * to reserve the space. return failed. + */ + spin_lock(rsv_lock); + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; /* failed */ + } + + start_block = first_free_block + group_first_block; + /* + * check if the first free block is within the + * free space we just reserved + */ + if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) + return 0; /* success */ + /* + * if the first free bit we found is out of the reservable space + * continue search for next reservable space, + * start from where the free block is, + * we also shift the list head to where we stopped last time + */ + search_head = my_rsv; + spin_lock(rsv_lock); + goto retry; +} + +/** + * try_to_extend_reservation() + * @my_rsv: given reservation window + * @sb: super block + * @size: the delta to extend + * + * Attempt to expand the reservation window large enough to have + * required number of free blocks + * + * Since ext4_try_to_allocate() will always allocate blocks within + * the reservation window range, if the window size is too small, + * multiple blocks allocation has to stop at the end of the reservation + * window. To make this more efficient, given the total number of + * blocks needed and the current size of the window, we try to + * expand the reservation window size if necessary on a best-effort + * basis before ext4_new_blocks() tries to allocate blocks, + */ +static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, + struct super_block *sb, int size) +{ + struct ext4_reserve_window_node *next_rsv; + struct rb_node *next; + spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; + + if (!spin_trylock(rsv_lock)) + return; + + next = rb_next(&my_rsv->rsv_node); + + if (!next) + my_rsv->rsv_end += size; + else { + next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); + + if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) + my_rsv->rsv_end += size; + else + my_rsv->rsv_end = next_rsv->rsv_start - 1; + } + spin_unlock(rsv_lock); +} + +/** + * ext4_try_to_allocate_with_rsv() + * @sb: superblock + * @handle: handle to this transaction + * @group: given allocation block group + * @bitmap_bh: bufferhead holds the block bitmap + * @grp_goal: given target block within the group + * @count: target number of blocks to allocate + * @my_rsv: reservation window + * @errp: pointer to store the error code + * + * This is the main function used to allocate a new block and its reservation + * window. + * + * Each time when a new block allocation is need, first try to allocate from + * its own reservation. If it does not have a reservation window, instead of + * looking for a free bit on bitmap first, then look up the reservation list to + * see if it is inside somebody else's reservation window, we try to allocate a + * reservation window for it starting from the goal first. Then do the block + * allocation within the reservation window. + * + * This will avoid keeping on searching the reservation list again and + * again when somebody is looking for a free block (without + * reservation), and there are lots of free blocks, but they are all + * being reserved. + * + * We use a red-black tree for the per-filesystem reservation list. + * + */ +static ext4_grpblk_t +ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, + ext4_group_t group, struct buffer_head *bitmap_bh, + ext4_grpblk_t grp_goal, + struct ext4_reserve_window_node * my_rsv, + unsigned long *count, int *errp) +{ + ext4_fsblk_t group_first_block, group_last_block; + ext4_grpblk_t ret = 0; + int fatal; + unsigned long num = *count; + + *errp = 0; + + /* + * Make sure we use undo access for the bitmap, because it is critical + * that we do the frozen_data COW on bitmap buffers in all cases even + * if the buffer is in BJ_Forget state in the committing transaction. + */ + BUFFER_TRACE(bitmap_bh, "get undo access for new block"); + fatal = ext4_journal_get_undo_access(handle, bitmap_bh); + if (fatal) { + *errp = fatal; + return -1; + } + + /* + * we don't deal with reservation when + * filesystem is mounted without reservation + * or the file is not a regular file + * or last attempt to allocate a block with reservation turned on failed + */ + if (my_rsv == NULL ) { + ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, + grp_goal, count, NULL); + goto out; + } + /* + * grp_goal is a group relative block number (if there is a goal) + * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) + * first block is a filesystem wide block number + * first block is the block number of the first block in this group + */ + group_first_block = ext4_group_first_block_no(sb, group); + group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); + + /* + * Basically we will allocate a new block from inode's reservation + * window. + * + * We need to allocate a new reservation window, if: + * a) inode does not have a reservation window; or + * b) last attempt to allocate a block from existing reservation + * failed; or + * c) we come here with a goal and with a reservation window + * + * We do not need to allocate a new reservation window if we come here + * at the beginning with a goal and the goal is inside the window, or + * we don't have a goal but already have a reservation window. + * then we could go to allocate from the reservation window directly. + */ + while (1) { + if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || + !goal_in_my_reservation(&my_rsv->rsv_window, + grp_goal, group, sb)) { + if (my_rsv->rsv_goal_size < *count) + my_rsv->rsv_goal_size = *count; + ret = alloc_new_reservation(my_rsv, grp_goal, sb, + group, bitmap_bh); + if (ret < 0) + break; /* failed */ + + if (!goal_in_my_reservation(&my_rsv->rsv_window, + grp_goal, group, sb)) + grp_goal = -1; + } else if (grp_goal >= 0) { + int curr = my_rsv->rsv_end - + (grp_goal + group_first_block) + 1; + + if (curr < *count) + try_to_extend_reservation(my_rsv, sb, + *count - curr); + } + + if ((my_rsv->rsv_start > group_last_block) || + (my_rsv->rsv_end < group_first_block)) { + rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); + BUG(); + } + ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, + grp_goal, &num, &my_rsv->rsv_window); + if (ret >= 0) { + my_rsv->rsv_alloc_hit += num; + *count = num; + break; /* succeed */ + } + num = *count; + } +out: + if (ret >= 0) { + BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " + "bitmap block"); + fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (fatal) { + *errp = fatal; + return -1; + } + return ret; + } + + BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); + ext4_journal_release_buffer(handle, bitmap_bh); + return ret; +} + /** * ext4_has_free_blocks() * @sbi: in-core super block structure. @@ -629,34 +1610,29 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, * On success, return nblocks */ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks) + ext4_fsblk_t nblocks) { - s64 free_blocks, dirty_blocks; - s64 root_blocks = 0; - struct percpu_counter *fbc = &sbi->s_freeblocks_counter; - struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; + ext4_fsblk_t free_blocks; + ext4_fsblk_t root_blocks = 0; - free_blocks = percpu_counter_read_positive(fbc); - dirty_blocks = percpu_counter_read_positive(dbc); + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); if (!capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) root_blocks = ext4_r_blocks_count(sbi->s_es); - - if (free_blocks - (nblocks + root_blocks + dirty_blocks) < - EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); - } - if (free_blocks <= (root_blocks + dirty_blocks)) +#ifdef CONFIG_SMP + if (free_blocks - root_blocks < FBC_BATCH) + free_blocks = + percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); +#endif + if (free_blocks <= root_blocks) /* we don't have free space */ return 0; - - if (free_blocks - (root_blocks + dirty_blocks) < nblocks) - return free_blocks - (root_blocks + dirty_blocks); + if (free_blocks - root_blocks < nblocks) + return free_blocks - root_blocks; return nblocks; -} + } /** @@ -681,6 +1657,303 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); } +/** + * ext4_old_new_blocks() -- core block bitmap based block allocation function + * + * @handle: handle to this transaction + * @inode: file inode + * @goal: given target block(filesystem wide) + * @count: target number of blocks to allocate + * @errp: error code + * + * ext4_old_new_blocks uses a goal block to assist allocation and look up + * the block bitmap directly to do block allocation. It tries to + * allocate block(s) from the block group contains the goal block first. If + * that fails, it will try to allocate block(s) from other block groups + * without any specific goal block. + * + * This function is called when -o nomballoc mount option is enabled + * + */ +ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gdp_bh; + ext4_group_t group_no; + ext4_group_t goal_group; + ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ + ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ + ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ + ext4_group_t bgi; /* blockgroup iteration index */ + int fatal = 0, err; + int performed_allocation = 0; + ext4_grpblk_t free_blocks; /* number of free blocks in a group */ + struct super_block *sb; + struct ext4_group_desc *gdp; + struct ext4_super_block *es; + struct ext4_sb_info *sbi; + struct ext4_reserve_window_node *my_rsv = NULL; + struct ext4_block_alloc_info *block_i; + unsigned short windowsz = 0; + ext4_group_t ngroups; + unsigned long num = *count; + + sb = inode->i_sb; + if (!sb) { + *errp = -ENODEV; + printk("ext4_new_block: nonexistent device"); + return 0; + } + + sbi = EXT4_SB(sb); + if (!EXT4_I(inode)->i_delalloc_reserved_flag) { + /* + * With delalloc we already reserved the blocks + */ + *count = ext4_has_free_blocks(sbi, *count); + } + if (*count == 0) { + *errp = -ENOSPC; + return 0; /*return with ENOSPC error */ + } + num = *count; + + /* + * Check quota for allocation of this block. + */ + if (DQUOT_ALLOC_BLOCK(inode, num)) { + *errp = -EDQUOT; + return 0; + } + + sbi = EXT4_SB(sb); + es = EXT4_SB(sb)->s_es; + ext4_debug("goal=%llu.\n", goal); + /* + * Allocate a block from reservation only when + * filesystem is mounted with reservation(default,-o reservation), and + * it's a regular file, and + * the desired window size is greater than 0 (One could use ioctl + * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off + * reservation on that particular file) + */ + block_i = EXT4_I(inode)->i_block_alloc_info; + if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) + my_rsv = &block_i->rsv_window_node; + + /* + * First, test whether the goal block is free. + */ + if (goal < le32_to_cpu(es->s_first_data_block) || + goal >= ext4_blocks_count(es)) + goal = le32_to_cpu(es->s_first_data_block); + ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); + goal_group = group_no; +retry_alloc: + gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); + if (!gdp) + goto io_error; + + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * if there is not enough free blocks to make a new resevation + * turn off reservation for this allocation + */ + if (my_rsv && (free_blocks < windowsz) + && (rsv_is_empty(&my_rsv->rsv_window))) + my_rsv = NULL; + + if (free_blocks > 0) { + bitmap_bh = ext4_read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, grp_target_blk, + my_rsv, &num, &fatal); + if (fatal) + goto out; + if (grp_alloc_blk >= 0) + goto allocated; + } + + ngroups = EXT4_SB(sb)->s_groups_count; + smp_rmb(); + + /* + * Now search the rest of the groups. We assume that + * group_no and gdp correctly point to the last group visited. + */ + for (bgi = 0; bgi < ngroups; bgi++) { + group_no++; + if (group_no >= ngroups) + group_no = 0; + gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); + if (!gdp) + goto io_error; + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * skip this group if the number of + * free blocks is less than half of the reservation + * window size. + */ + if (free_blocks <= (windowsz/2)) + continue; + + brelse(bitmap_bh); + bitmap_bh = ext4_read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + /* + * try to allocate block(s) from this group, without a goal(-1). + */ + grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, -1, my_rsv, + &num, &fatal); + if (fatal) + goto out; + if (grp_alloc_blk >= 0) + goto allocated; + } + /* + * We may end up a bogus ealier ENOSPC error due to + * filesystem is "full" of reservations, but + * there maybe indeed free blocks avaliable on disk + * In this case, we just forget about the reservations + * just do block allocation as without reservations. + */ + if (my_rsv) { + my_rsv = NULL; + windowsz = 0; + group_no = goal_group; + goto retry_alloc; + } + /* No space left on the device */ + *errp = -ENOSPC; + goto out; + +allocated: + + ext4_debug("using block group %lu(%d)\n", + group_no, gdp->bg_free_blocks_count); + + BUFFER_TRACE(gdp_bh, "get_write_access"); + fatal = ext4_journal_get_write_access(handle, gdp_bh); + if (fatal) + goto out; + + ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); + + if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || + in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || + in_range(ret_block, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group) || + in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group)) { + ext4_error(sb, "ext4_new_block", + "Allocating block in system zone - " + "blocks from %llu, length %lu", + ret_block, num); + /* + * claim_block marked the blocks we allocated + * as in use. So we may want to selectively + * mark some of the blocks as free + */ + goto retry_alloc; + } + + performed_allocation = 1; + +#ifdef CONFIG_JBD2_DEBUG + { + struct buffer_head *debug_bh; + + /* Record bitmap buffer state in the newly allocated block */ + debug_bh = sb_find_get_block(sb, ret_block); + if (debug_bh) { + BUFFER_TRACE(debug_bh, "state when allocated"); + BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); + brelse(debug_bh); + } + } + jbd_lock_bh_state(bitmap_bh); + spin_lock(sb_bgl_lock(sbi, group_no)); + if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { + int i; + + for (i = 0; i < num; i++) { + if (ext4_test_bit(grp_alloc_blk+i, + bh2jh(bitmap_bh)->b_committed_data)) { + printk("%s: block was unexpectedly set in " + "b_committed_data\n", __func__); + } + } + } + ext4_debug("found bit %d\n", grp_alloc_blk); + spin_unlock(sb_bgl_lock(sbi, group_no)); + jbd_unlock_bh_state(bitmap_bh); +#endif + + if (ret_block + num - 1 >= ext4_blocks_count(es)) { + ext4_error(sb, "ext4_new_block", + "block(%llu) >= blocks count(%llu) - " + "block_group = %lu, es == %p ", ret_block, + ext4_blocks_count(es), group_no, es); + goto out; + } + + /* + * It is up to the caller to add the new buffer to a journal + * list of some description. We don't know in advance whether + * the caller wants to use it as metadata or data. + */ + spin_lock(sb_bgl_lock(sbi, group_no)); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + le16_add_cpu(&gdp->bg_free_blocks_count, -num); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); + spin_unlock(sb_bgl_lock(sbi, group_no)); + if (!EXT4_I(inode)->i_delalloc_reserved_flag) + percpu_counter_sub(&sbi->s_freeblocks_counter, num); + + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group_no); + spin_lock(sb_bgl_lock(sbi, flex_group)); + sbi->s_flex_groups[flex_group].free_blocks -= num; + spin_unlock(sb_bgl_lock(sbi, flex_group)); + } + + BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); + err = ext4_journal_dirty_metadata(handle, gdp_bh); + if (!fatal) + fatal = err; + + sb->s_dirt = 1; + if (fatal) + goto out; + + *errp = 0; + brelse(bitmap_bh); + DQUOT_FREE_BLOCK(inode, *count-num); + *count = num; + return ret_block; + +io_error: + *errp = -EIO; +out: + if (fatal) { + *errp = fatal; + ext4_std_error(sb, fatal); + } + /* + * Undo the block allocation + */ + if (!performed_allocation) + DQUOT_FREE_BLOCK(inode, *count); + brelse(bitmap_bh); + return 0; +} + #define EXT4_META_BLOCK 0x1 static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, @@ -690,6 +1963,10 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; ext4_fsblk_t ret; + if (!test_opt(inode->i_sb, MBALLOC)) { + return ext4_old_new_blocks(handle, inode, goal, count, errp); + } + memset(&ar, 0, sizeof(ar)); /* Fill with neighbour allocated blocks */ @@ -731,7 +2008,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, /* * Account for the allocated meta blocks */ - if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { + if (!(*errp)) { spin_lock(&EXT4_I(inode)->i_block_reservation_lock); EXT4_I(inode)->i_allocated_meta_blocks += *count; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); @@ -816,9 +2093,10 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) bitmap_count += x; } brelse(bitmap_bh); - printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" - ", computed = %llu, %llu\n", ext4_free_blocks_count(es), - desc_count, bitmap_count); + printk("ext4_count_free_blocks: stored = %llu" + ", computed = %llu, %llu\n", + ext4_free_blocks_count(es), + desc_count, bitmap_count); return bitmap_count; #else desc_count = 0; @@ -905,9 +2183,8 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || metagroup < first_meta_bg) - return ext4_bg_num_gdb_nometa(sb, group); + return ext4_bg_num_gdb_nometa(sb,group); return ext4_bg_num_gdb_meta(sb,group); } - diff --git a/trunk/fs/ext4/bitmap.c b/trunk/fs/ext4/bitmap.c index 0a7a6663c190..d37ea6750454 100644 --- a/trunk/fs/ext4/bitmap.c +++ b/trunk/fs/ext4/bitmap.c @@ -15,17 +15,17 @@ static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; -unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) +unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) { unsigned int i; unsigned long sum = 0; if (!map) - return 0; + return (0); for (i = 0; i < numchars; i++) sum += nibblemap[map->b_data[i] & 0xf] + nibblemap[(map->b_data[i] >> 4) & 0xf]; - return sum; + return (sum); } #endif /* EXT4FS_DEBUG */ diff --git a/trunk/fs/ext4/dir.c b/trunk/fs/ext4/dir.c index 3ca6a2b7632d..ec8e33b45219 100644 --- a/trunk/fs/ext4/dir.c +++ b/trunk/fs/ext4/dir.c @@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { }; static int ext4_readdir(struct file *, void *, filldir_t); -static int ext4_dx_readdir(struct file *filp, - void *dirent, filldir_t filldir); -static int ext4_release_dir(struct inode *inode, - struct file *filp); +static int ext4_dx_readdir(struct file * filp, + void * dirent, filldir_t filldir); +static int ext4_release_dir (struct inode * inode, + struct file * filp); const struct file_operations ext4_dir_operations = { .llseek = generic_file_llseek, @@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) } -int ext4_check_dir_entry(const char *function, struct inode *dir, - struct ext4_dir_entry_2 *de, - struct buffer_head *bh, - unsigned long offset) +int ext4_check_dir_entry (const char * function, struct inode * dir, + struct ext4_dir_entry_2 * de, + struct buffer_head * bh, + unsigned long offset) { - const char *error_msg = NULL; + const char * error_msg = NULL; const int rlen = ext4_rec_len_from_disk(de->rec_len); if (rlen < EXT4_DIR_REC_LEN(1)) @@ -82,7 +82,7 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, error_msg = "inode out of bounds"; if (error_msg != NULL) - ext4_error(dir->i_sb, function, + ext4_error (dir->i_sb, function, "bad entry in directory #%lu: %s - " "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", dir->i_ino, error_msg, offset, @@ -91,8 +91,8 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, return error_msg == NULL ? 1 : 0; } -static int ext4_readdir(struct file *filp, - void *dirent, filldir_t filldir) +static int ext4_readdir(struct file * filp, + void * dirent, filldir_t filldir) { int error = 0; unsigned long offset; @@ -102,7 +102,6 @@ static int ext4_readdir(struct file *filp, int err; struct inode *inode = filp->f_path.dentry->d_inode; int ret = 0; - int dir_has_error = 0; sb = inode->i_sb; @@ -149,13 +148,9 @@ static int ext4_readdir(struct file *filp, * of recovering data when there's a bad sector */ if (!bh) { - if (!dir_has_error) { - ext4_error(sb, __func__, "directory #%lu " - "contains a hole at offset %Lu", - inode->i_ino, - (unsigned long long) filp->f_pos); - dir_has_error = 1; - } + ext4_error (sb, "ext4_readdir", + "directory #%lu contains a hole at offset %lu", + inode->i_ino, (unsigned long)filp->f_pos); /* corrupt size? Maybe no more blocks to read */ if (filp->f_pos > inode->i_blocks << 9) break; @@ -192,14 +187,14 @@ static int ext4_readdir(struct file *filp, while (!error && filp->f_pos < inode->i_size && offset < sb->s_blocksize) { de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); - if (!ext4_check_dir_entry("ext4_readdir", inode, de, - bh, offset)) { + if (!ext4_check_dir_entry ("ext4_readdir", inode, de, + bh, offset)) { /* * On error, skip the f_pos to the next block */ filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; - brelse(bh); + brelse (bh); ret = stored; goto out; } @@ -223,12 +218,12 @@ static int ext4_readdir(struct file *filp, break; if (version != filp->f_version) goto revalidate; - stored++; + stored ++; } filp->f_pos += ext4_rec_len_from_disk(de->rec_len); } offset = 0; - brelse(bh); + brelse (bh); } out: return ret; @@ -295,9 +290,9 @@ static void free_rb_tree_fname(struct rb_root *root) parent = rb_parent(n); fname = rb_entry(n, struct fname, rb_hash); while (fname) { - struct fname *old = fname; + struct fname * old = fname; fname = fname->next; - kfree(old); + kfree (old); } if (!parent) root->rb_node = NULL; @@ -336,7 +331,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, struct ext4_dir_entry_2 *dirent) { struct rb_node **p, *parent = NULL; - struct fname *fname, *new_fn; + struct fname * fname, *new_fn; struct dir_private_info *info; int len; @@ -393,20 +388,19 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, * for all entres on the fname linked list. (Normally there is only * one entry on the linked list, unless there are 62 bit hash collisions.) */ -static int call_filldir(struct file *filp, void *dirent, +static int call_filldir(struct file * filp, void * dirent, filldir_t filldir, struct fname *fname) { struct dir_private_info *info = filp->private_data; loff_t curr_pos; struct inode *inode = filp->f_path.dentry->d_inode; - struct super_block *sb; + struct super_block * sb; int error; sb = inode->i_sb; if (!fname) { - printk(KERN_ERR "ext4: call_filldir: called with " - "null fname?!?\n"); + printk("call_filldir: called with null fname?!?\n"); return 0; } curr_pos = hash2pos(fname->hash, fname->minor_hash); @@ -425,8 +419,8 @@ static int call_filldir(struct file *filp, void *dirent, return 0; } -static int ext4_dx_readdir(struct file *filp, - void *dirent, filldir_t filldir) +static int ext4_dx_readdir(struct file * filp, + void * dirent, filldir_t filldir) { struct dir_private_info *info = filp->private_data; struct inode *inode = filp->f_path.dentry->d_inode; @@ -517,7 +511,7 @@ static int ext4_dx_readdir(struct file *filp, return 0; } -static int ext4_release_dir(struct inode *inode, struct file *filp) +static int ext4_release_dir (struct inode * inode, struct file * filp) { if (filp->private_data) ext4_htree_free_dir_info(filp->private_data); diff --git a/trunk/fs/ext4/ext4.h b/trunk/fs/ext4/ext4.h index f46a513a5157..295003241d3d 100644 --- a/trunk/fs/ext4/ext4.h +++ b/trunk/fs/ext4/ext4.h @@ -44,9 +44,9 @@ #ifdef EXT4FS_DEBUG #define ext4_debug(f, a...) \ do { \ - printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ + printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ __FILE__, __LINE__, __func__); \ - printk(KERN_DEBUG f, ## a); \ + printk (KERN_DEBUG f, ## a); \ } while (0) #else #define ext4_debug(f, a...) do {} while (0) @@ -128,7 +128,7 @@ struct ext4_allocation_request { #else # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) #endif -#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) #else @@ -245,7 +245,7 @@ struct flex_groups { #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ +#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ /* * Inode dynamic state flags @@ -291,6 +291,8 @@ struct ext4_new_group_data { #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT4_IOC_GETVERSION _IOR('f', 3, long) #define EXT4_IOC_SETVERSION _IOW('f', 4, long) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION #ifdef CONFIG_JBD2_DEBUG @@ -298,10 +300,7 @@ struct ext4_new_group_data { #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) -#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) -#define EXT4_IOC_MIGRATE _IO('f', 9) - /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ +#define EXT4_IOC_MIGRATE _IO('f', 7) /* * ioctl commands in 32 bit emulation @@ -539,6 +538,7 @@ do { \ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ +#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -667,7 +667,7 @@ struct ext4_super_block { }; #ifdef __KERNEL__ -static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) +static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) { return sb->s_fs_info; } @@ -725,11 +725,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) */ #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ - (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) + ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ - (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) + ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ - (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) + ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ @@ -789,8 +789,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_DEF_RESUID 0 #define EXT4_DEF_RESGID 0 -#define EXT4_DEF_INODE_READAHEAD_BLKS 32 - /* * Default mount options */ @@ -956,24 +954,6 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, unsigned long *blockgrpp, ext4_grpblk_t *offsetp); -extern struct proc_dir_entry *ext4_proc_root; - -#ifdef CONFIG_PROC_FS -extern const struct file_operations ext4_ui_proc_fops; - -#define EXT4_PROC_HANDLER(name, var) \ -do { \ - proc = proc_create_data(name, mode, sbi->s_proc, \ - &ext4_ui_proc_fops, &sbi->s_##var); \ - if (proc == NULL) { \ - printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ - goto err_out; \ - } \ -} while (0) -#else -#define EXT4_PROC_HANDLER(name, var) -#endif - /* * Function prototypes */ @@ -1001,20 +981,23 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, ext4_fsblk_t goal, unsigned long *count, int *errp); -extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); +extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks); -extern void ext4_free_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t nblocks); +extern void ext4_free_blocks (handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int metadata); -extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count, +extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, unsigned long *pdquot_freed_blocks); -extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); -extern void ext4_check_blocks_bitmap(struct super_block *); +extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); +extern void ext4_check_blocks_bitmap (struct super_block *); extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +extern void ext4_init_block_alloc_info(struct inode *); +extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); /* dir.c */ extern int ext4_check_dir_entry(const char *, struct inode *, @@ -1026,20 +1009,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext4_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext4_sync_file(struct file *, struct dentry *, int); +extern int ext4_sync_file (struct file *, struct dentry *, int); /* hash.c */ extern int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo); /* ialloc.c */ -extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); -extern void ext4_free_inode(handle_t *, struct inode *); -extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); -extern unsigned long ext4_count_free_inodes(struct super_block *); -extern unsigned long ext4_count_dirs(struct super_block *); -extern void ext4_check_inodes_bitmap(struct super_block *); -extern unsigned long ext4_count_free(struct buffer_head *, unsigned); +extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); +extern void ext4_free_inode (handle_t *, struct inode *); +extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); +extern unsigned long ext4_count_free_inodes (struct super_block *); +extern unsigned long ext4_count_dirs (struct super_block *); +extern void ext4_check_inodes_bitmap (struct super_block *); +extern unsigned long ext4_count_free (struct buffer_head *, unsigned); /* mballoc.c */ extern long ext4_mb_stats; @@ -1049,7 +1032,7 @@ extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); extern int ext4_mb_reserve_blocks(struct super_block *, int); -extern void ext4_discard_preallocations(struct inode *); +extern void ext4_mb_discard_inode_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); extern void ext4_mb_free_blocks(handle_t *, struct inode *, @@ -1067,25 +1050,24 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int, int *); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int, int *); -int ext4_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create); int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize); extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode(struct inode *, int); -extern int ext4_setattr(struct dentry *, struct iattr *); +extern int ext4_write_inode (struct inode *, int); +extern int ext4_setattr (struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -extern void ext4_delete_inode(struct inode *); -extern int ext4_sync_inode(handle_t *, struct inode *); +extern void ext4_delete_inode (struct inode *); +extern int ext4_sync_inode (handle_t *, struct inode *); +extern void ext4_discard_reservation (struct inode *); extern void ext4_dirty_inode(struct inode *); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); -extern void ext4_truncate(struct inode *); +extern void ext4_truncate (struct inode *); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); @@ -1098,10 +1080,11 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); -extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); +extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); /* migrate.c */ -extern int ext4_ext_migrate(struct inode *); +extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, + unsigned long); /* namei.c */ extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); @@ -1116,14 +1099,14 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern void ext4_error(struct super_block *, const char *, const char *, ...) +extern void ext4_error (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void __ext4_std_error(struct super_block *, const char *, int); -extern void ext4_abort(struct super_block *, const char *, const char *, ...) +extern void __ext4_std_error (struct super_block *, const char *, int); +extern void ext4_abort (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning(struct super_block *, const char *, const char *, ...) +extern void ext4_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_update_dynamic_rev(struct super_block *sb); +extern void ext4_update_dynamic_rev (struct super_block *sb); extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, __u32 compat); extern int ext4_update_rocompat_feature(handle_t *handle, @@ -1196,7 +1179,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, - ext4_group_t group) + ext4_group_t group) { struct ext4_group_info ***grp_info; long indexv, indexh; @@ -1224,28 +1207,6 @@ do { \ __ext4_std_error((sb), __func__, (errno)); \ } while (0) -#ifdef CONFIG_SMP -/* Each CPU can accumulate FBC_BATCH blocks in their local - * counters. So we need to make sure we have free blocks more - * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. - */ -#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) -#else -#define EXT4_FREEBLOCKS_WATERMARK 0 -#endif - -static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) -{ - /* - * XXX: replace with spinlock if seen contended -bzzz - */ - down_write(&EXT4_I(inode)->i_data_sem); - if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; - up_write(&EXT4_I(inode)->i_data_sem); - return ; -} - /* * Inodes and files operations */ diff --git a/trunk/fs/ext4/ext4_extents.h b/trunk/fs/ext4/ext4_extents.h index bec7ce59fc0d..d33dc56d6986 100644 --- a/trunk/fs/ext4/ext4_extents.h +++ b/trunk/fs/ext4/ext4_extents.h @@ -124,19 +124,6 @@ struct ext4_ext_path { #define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_EXTENT 2 -/* - * to be called by ext4_ext_walk_space() - * negative retcode - error - * positive retcode - signal for ext4_ext_walk_space(), see below - * callback must return valid extent (passed or newly created) - */ -typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, - struct ext4_ext_cache *, - struct ext4_extent *, void *); - -#define EXT_CONTINUE 0 -#define EXT_BREAK 1 -#define EXT_REPEAT 2 #define EXT_MAX_BLOCK 0xffffffff @@ -237,8 +224,6 @@ extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); -extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, - ext_prepare_callback, void *); extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, struct ext4_ext_path *); extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, diff --git a/trunk/fs/ext4/ext4_i.h b/trunk/fs/ext4/ext4_i.h index 5c124c0ac6d3..ef7409f0e7e4 100644 --- a/trunk/fs/ext4/ext4_i.h +++ b/trunk/fs/ext4/ext4_i.h @@ -33,6 +33,38 @@ typedef __u32 ext4_lblk_t; /* data type for block group number */ typedef unsigned long ext4_group_t; +struct ext4_reserve_window { + ext4_fsblk_t _rsv_start; /* First byte reserved */ + ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ +}; + +struct ext4_reserve_window_node { + struct rb_node rsv_node; + __u32 rsv_goal_size; + __u32 rsv_alloc_hit; + struct ext4_reserve_window rsv_window; +}; + +struct ext4_block_alloc_info { + /* information about reservation window */ + struct ext4_reserve_window_node rsv_window_node; + /* + * was i_next_alloc_block in ext4_inode_info + * is the logical (file-relative) number of the + * most-recently-allocated block in this file. + * We use this for detecting linearly ascending allocation requests. + */ + ext4_lblk_t last_alloc_logical_block; + /* + * Was i_next_alloc_goal in ext4_inode_info + * is the *physical* companion to i_next_alloc_block. + * it the physical block number of the block which was most-recentl + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + ext4_fsblk_t last_alloc_physical_block; +}; + #define rsv_start rsv_window._rsv_start #define rsv_end rsv_window._rsv_end @@ -65,8 +97,11 @@ struct ext4_inode_info { ext4_group_t i_block_group; __u32 i_state; /* Dynamic state flags for ext4 */ + /* block reservation info */ + struct ext4_block_alloc_info *i_block_alloc_info; + ext4_lblk_t i_dir_start_lookup; -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR /* * Extended attributes can be read independently of the main file * data. Taking i_mutex even when reading would cause contention @@ -76,7 +111,7 @@ struct ext4_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif diff --git a/trunk/fs/ext4/ext4_sb.h b/trunk/fs/ext4/ext4_sb.h index 6a0b40d43264..6300226d5531 100644 --- a/trunk/fs/ext4/ext4_sb.h +++ b/trunk/fs/ext4/ext4_sb.h @@ -40,8 +40,8 @@ struct ext4_sb_info { unsigned long s_blocks_last; /* Last seen block count */ loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; unsigned long s_mount_opt; ext4_fsblk_t s_sb_block; uid_t s_resuid; @@ -52,7 +52,6 @@ struct ext4_sb_info { int s_desc_per_block_bits; int s_inode_size; int s_first_ino; - unsigned int s_inode_readahead_blks; spinlock_t s_next_gen_lock; u32 s_next_generation; u32 s_hash_seed[4]; @@ -60,17 +59,16 @@ struct ext4_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; - struct percpu_counter s_dirtyblocks_counter; struct blockgroup_lock s_blockgroup_lock; - struct proc_dir_entry *s_proc; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; + struct ext4_reserve_window_node s_rsv_window_head; /* Journaling */ - struct inode *s_journal_inode; - struct journal_s *s_journal; + struct inode * s_journal_inode; + struct journal_s * s_journal; struct list_head s_orphan; unsigned long s_commit_interval; struct block_device *journal_bdev; @@ -108,12 +106,12 @@ struct ext4_sb_info { /* tunables */ unsigned long s_stripe; - unsigned int s_mb_stream_request; - unsigned int s_mb_max_to_scan; - unsigned int s_mb_min_to_scan; - unsigned int s_mb_stats; - unsigned int s_mb_order2_reqs; - unsigned int s_mb_group_prealloc; + unsigned long s_mb_stream_request; + unsigned long s_mb_max_to_scan; + unsigned long s_mb_min_to_scan; + unsigned long s_mb_stats; + unsigned long s_mb_order2_reqs; + unsigned long s_mb_group_prealloc; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; unsigned long s_mb_last_start; @@ -123,6 +121,7 @@ struct ext4_sb_info { int s_mb_history_cur; int s_mb_history_max; int s_mb_history_num; + struct proc_dir_entry *s_mb_proc; spinlock_t s_mb_history_lock; int s_mb_history_filter; diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c index ea2ce3c0ae66..b24d3c53f20c 100644 --- a/trunk/fs/ext4/extents.c +++ b/trunk/fs/ext4/extents.c @@ -40,7 +40,6 @@ #include #include #include -#include #include "ext4_jbd2.h" #include "ext4_extents.h" @@ -384,8 +383,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) ext_debug("\n"); } #else -#define ext4_ext_show_path(inode, path) -#define ext4_ext_show_leaf(inode, path) +#define ext4_ext_show_path(inode,path) +#define ext4_ext_show_leaf(inode,path) #endif void ext4_ext_drop_refs(struct ext4_ext_path *path) @@ -441,10 +440,9 @@ ext4_ext_binsearch_idx(struct inode *inode, for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { if (k != 0 && le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { - printk(KERN_DEBUG "k=%d, ix=0x%p, " - "first=0x%p\n", k, - ix, EXT_FIRST_INDEX(eh)); - printk(KERN_DEBUG "%u <= %u\n", + printk("k=%d, ix=0x%p, first=0x%p\n", k, + ix, EXT_FIRST_INDEX(eh)); + printk("%u <= %u\n", le32_to_cpu(ix->ei_block), le32_to_cpu(ix[-1].ei_block)); } @@ -1477,7 +1475,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *newext) { - struct ext4_extent_header *eh; + struct ext4_extent_header * eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ struct ext4_ext_path *npath = NULL; @@ -1627,113 +1625,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, return err; } -int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, - ext4_lblk_t num, ext_prepare_callback func, - void *cbdata) -{ - struct ext4_ext_path *path = NULL; - struct ext4_ext_cache cbex; - struct ext4_extent *ex; - ext4_lblk_t next, start = 0, end = 0; - ext4_lblk_t last = block + num; - int depth, exists, err = 0; - - BUG_ON(func == NULL); - BUG_ON(inode == NULL); - - while (block < last && block != EXT_MAX_BLOCK) { - num = last - block; - /* find extent for this block */ - path = ext4_ext_find_extent(inode, block, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - path = NULL; - break; - } - - depth = ext_depth(inode); - BUG_ON(path[depth].p_hdr == NULL); - ex = path[depth].p_ext; - next = ext4_ext_next_allocated_block(path); - - exists = 0; - if (!ex) { - /* there is no extent yet, so try to allocate - * all requested space */ - start = block; - end = block + num; - } else if (le32_to_cpu(ex->ee_block) > block) { - /* need to allocate space before found extent */ - start = block; - end = le32_to_cpu(ex->ee_block); - if (block + num < end) - end = block + num; - } else if (block >= le32_to_cpu(ex->ee_block) - + ext4_ext_get_actual_len(ex)) { - /* need to allocate space after found extent */ - start = block; - end = block + num; - if (end >= next) - end = next; - } else if (block >= le32_to_cpu(ex->ee_block)) { - /* - * some part of requested space is covered - * by found extent - */ - start = block; - end = le32_to_cpu(ex->ee_block) - + ext4_ext_get_actual_len(ex); - if (block + num < end) - end = block + num; - exists = 1; - } else { - BUG(); - } - BUG_ON(end <= start); - - if (!exists) { - cbex.ec_block = start; - cbex.ec_len = end - start; - cbex.ec_start = 0; - cbex.ec_type = EXT4_EXT_CACHE_GAP; - } else { - cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = ext4_ext_get_actual_len(ex); - cbex.ec_start = ext_pblock(ex); - cbex.ec_type = EXT4_EXT_CACHE_EXTENT; - } - - BUG_ON(cbex.ec_len == 0); - err = func(inode, path, &cbex, ex, cbdata); - ext4_ext_drop_refs(path); - - if (err < 0) - break; - - if (err == EXT_REPEAT) - continue; - else if (err == EXT_BREAK) { - err = 0; - break; - } - - if (ext_depth(inode) != depth) { - /* depth was changed. we have to realloc path */ - kfree(path); - path = NULL; - } - - block = cbex.ec_block + cbex.ec_len; - } - - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } - - return err; -} - static void ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, __u32 len, ext4_fsblk_t start, int type) @@ -2251,7 +2142,7 @@ void ext4_ext_init(struct super_block *sb) */ if (test_opt(sb, EXTENTS)) { - printk(KERN_INFO "EXT4-fs: file extents enabled"); + printk("EXT4-fs: file extents enabled"); #ifdef AGGRESSIVE_TEST printk(", aggressive tests"); #endif @@ -2805,8 +2696,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goto out2; } /* - * Okay, we need to do block allocation. + * Okay, we need to do block allocation. Lazily initialize the block + * allocation info here if necessary. */ + if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) + ext4_init_block_alloc_info(inode); /* find neighbour allocated blocks */ ar.lleft = iblock; @@ -2866,7 +2760,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* free data blocks we just allocated */ /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ - ext4_discard_preallocations(inode); + ext4_mb_discard_inode_preallocations(inode); ext4_free_blocks(handle, inode, ext_pblock(&newex), ext4_ext_get_actual_len(&newex), 0); goto out2; @@ -2930,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); ext4_ext_invalidate_cache(inode); - ext4_discard_preallocations(inode); + ext4_discard_reservation(inode); /* * TODO: optimization is possible here. @@ -2983,11 +2877,10 @@ static void ext4_falloc_update_inode(struct inode *inode, * Update only when preallocation was requested beyond * the file size. */ - if (!(mode & FALLOC_FL_KEEP_SIZE)) { - if (new_size > i_size_read(inode)) - i_size_write(inode, new_size); - if (new_size > EXT4_I(inode)->i_disksize) - ext4_update_i_disksize(inode, new_size); + if (!(mode & FALLOC_FL_KEEP_SIZE) && + new_size > i_size_read(inode)) { + i_size_write(inode, new_size); + EXT4_I(inode)->i_disksize = new_size; } } @@ -3079,143 +2972,3 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) mutex_unlock(&inode->i_mutex); return ret > 0 ? ret2 : ret; } - -/* - * Callback function called for each extent to gather FIEMAP information. - */ -int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, - struct ext4_ext_cache *newex, struct ext4_extent *ex, - void *data) -{ - struct fiemap_extent_info *fieinfo = data; - unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; - __u64 logical; - __u64 physical; - __u64 length; - __u32 flags = 0; - int error; - - logical = (__u64)newex->ec_block << blksize_bits; - - if (newex->ec_type == EXT4_EXT_CACHE_GAP) { - pgoff_t offset; - struct page *page; - struct buffer_head *bh = NULL; - - offset = logical >> PAGE_SHIFT; - page = find_get_page(inode->i_mapping, offset); - if (!page || !page_has_buffers(page)) - return EXT_CONTINUE; - - bh = page_buffers(page); - - if (!bh) - return EXT_CONTINUE; - - if (buffer_delay(bh)) { - flags |= FIEMAP_EXTENT_DELALLOC; - page_cache_release(page); - } else { - page_cache_release(page); - return EXT_CONTINUE; - } - } - - physical = (__u64)newex->ec_start << blksize_bits; - length = (__u64)newex->ec_len << blksize_bits; - - if (ex && ext4_ext_is_uninitialized(ex)) - flags |= FIEMAP_EXTENT_UNWRITTEN; - - /* - * If this extent reaches EXT_MAX_BLOCK, it must be last. - * - * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, - * this also indicates no more allocated blocks. - * - * XXX this might miss a single-block extent at EXT_MAX_BLOCK - */ - if (logical + length - 1 == EXT_MAX_BLOCK || - ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) - flags |= FIEMAP_EXTENT_LAST; - - error = fiemap_fill_next_extent(fieinfo, logical, physical, - length, flags); - if (error < 0) - return error; - if (error == 1) - return EXT_BREAK; - - return EXT_CONTINUE; -} - -/* fiemap flags we can handle specified here */ -#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) - -int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) -{ - __u64 physical = 0; - __u64 length; - __u32 flags = FIEMAP_EXTENT_LAST; - int blockbits = inode->i_sb->s_blocksize_bits; - int error = 0; - - /* in-inode? */ - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { - struct ext4_iloc iloc; - int offset; /* offset of xattr in inode */ - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - physical = iloc.bh->b_blocknr << blockbits; - offset = EXT4_GOOD_OLD_INODE_SIZE + - EXT4_I(inode)->i_extra_isize; - physical += offset; - length = EXT4_SB(inode->i_sb)->s_inode_size - offset; - flags |= FIEMAP_EXTENT_DATA_INLINE; - } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; - length = inode->i_sb->s_blocksize; - } - - if (physical) - error = fiemap_fill_next_extent(fieinfo, 0, physical, - length, flags); - return (error < 0 ? error : 0); -} - -int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len) -{ - ext4_lblk_t start_blk; - ext4_lblk_t len_blks; - int error = 0; - - /* fallback to generic here if not in extents fmt */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) - return generic_block_fiemap(inode, fieinfo, start, len, - ext4_get_block); - - if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) - return -EBADR; - - if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { - error = ext4_xattr_fiemap(inode, fieinfo); - } else { - start_blk = start >> inode->i_sb->s_blocksize_bits; - len_blks = len >> inode->i_sb->s_blocksize_bits; - - /* - * Walk the extent tree gathering extent information. - * ext4_ext_fiemap_cb will push extents back to user. - */ - down_write(&EXT4_I(inode)->i_data_sem); - error = ext4_ext_walk_space(inode, start_blk, len_blks, - ext4_ext_fiemap_cb, fieinfo); - up_write(&EXT4_I(inode)->i_data_sem); - } - - return error; -} - diff --git a/trunk/fs/ext4/file.c b/trunk/fs/ext4/file.c index 6bd11fba71f7..430eb7978db4 100644 --- a/trunk/fs/ext4/file.c +++ b/trunk/fs/ext4/file.c @@ -31,14 +31,14 @@ * from ext4_file_open: open gets called at every open, but release * gets called only when /all/ the files are closed. */ -static int ext4_release_file(struct inode *inode, struct file *filp) +static int ext4_release_file (struct inode * inode, struct file * filp) { /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_reservation(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) @@ -140,9 +140,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len); - const struct file_operations ext4_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -165,7 +162,7 @@ const struct inode_operations ext4_file_inode_operations = { .truncate = ext4_truncate, .setattr = ext4_setattr, .getattr = ext4_getattr, -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, @@ -173,6 +170,5 @@ const struct inode_operations ext4_file_inode_operations = { #endif .permission = ext4_permission, .fallocate = ext4_fallocate, - .fiemap = ext4_fiemap, }; diff --git a/trunk/fs/ext4/fsync.c b/trunk/fs/ext4/fsync.c index 5afe4370840b..a45c3737ad31 100644 --- a/trunk/fs/ext4/fsync.c +++ b/trunk/fs/ext4/fsync.c @@ -28,7 +28,6 @@ #include #include #include -#include #include "ext4.h" #include "ext4_jbd2.h" @@ -44,7 +43,7 @@ * inode to disk. */ -int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) +int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; @@ -52,10 +51,6 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) J_ASSERT(ext4_journal_current_handle() == NULL); - trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", - inode->i_sb->s_id, datasync, inode->i_ino, - dentry->d_parent->d_inode->i_ino); - /* * data=writeback: * The caller's filemap_fdatawrite()/wait will sync the data. diff --git a/trunk/fs/ext4/hash.c b/trunk/fs/ext4/hash.c index 556ca8eba3db..1d6329dbe390 100644 --- a/trunk/fs/ext4/hash.c +++ b/trunk/fs/ext4/hash.c @@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) sum += DELTA; b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); - } while (--n); + } while(--n); buf[0] += b0; buf[1] += b1; @@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) /* The old legacy hash */ -static __u32 dx_hack_hash(const char *name, int len) +static __u32 dx_hack_hash (const char *name, int len) { __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; while (len--) { @@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) val = pad; if (len > num*4) len = num * 4; - for (i = 0; i < len; i++) { + for (i=0; i < len; i++) { if ((i % 4) == 0) val = pad; val = msg[i] + (val << 8); @@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) /* Check to see if the seed is all zero's */ if (hinfo->seed) { - for (i = 0; i < 4; i++) { + for (i=0; i < 4; i++) { if (hinfo->seed[i]) break; } diff --git a/trunk/fs/ext4/ialloc.c b/trunk/fs/ext4/ialloc.c index fe34d74cfb19..f344834bbf58 100644 --- a/trunk/fs/ext4/ialloc.c +++ b/trunk/fs/ext4/ialloc.c @@ -115,11 +115,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - if (buffer_uptodate(bh) && - !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) + if (bh_uptodate_or_lock(bh)) return bh; - lock_buffer(bh); spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); @@ -156,40 +154,39 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) * though), and then we'd have two inodes sharing the * same inode number and space on the harddisk. */ -void ext4_free_inode(handle_t *handle, struct inode *inode) +void ext4_free_inode (handle_t *handle, struct inode * inode) { - struct super_block *sb = inode->i_sb; + struct super_block * sb = inode->i_sb; int is_directory; unsigned long ino; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; ext4_group_t block_group; unsigned long bit; - struct ext4_group_desc *gdp; - struct ext4_super_block *es; + struct ext4_group_desc * gdp; + struct ext4_super_block * es; struct ext4_sb_info *sbi; int fatal = 0, err; ext4_group_t flex_group; if (atomic_read(&inode->i_count) > 1) { - printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", - atomic_read(&inode->i_count)); + printk ("ext4_free_inode: inode has count=%d\n", + atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { - printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", - inode->i_nlink); + printk ("ext4_free_inode: inode has nlink=%d\n", + inode->i_nlink); return; } if (!sb) { - printk(KERN_ERR "ext4_free_inode: inode on " - "nonexistent device\n"); + printk("ext4_free_inode: inode on nonexistent device\n"); return; } sbi = EXT4_SB(sb); ino = inode->i_ino; - ext4_debug("freeing inode %lu\n", ino); + ext4_debug ("freeing inode %lu\n", ino); /* * Note: we must free any quota before locking the superblock, @@ -203,12 +200,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) is_directory = S_ISDIR(inode->i_mode); /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode(inode); + clear_inode (inode); es = EXT4_SB(sb)->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error(sb, "ext4_free_inode", - "reserved or nonexistent inode %lu", ino); + ext4_error (sb, "ext4_free_inode", + "reserved or nonexistent inode %lu", ino); goto error_return; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -225,10 +222,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) /* Ok, now we can actually update the inode bitmaps.. */ if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), bit, bitmap_bh->b_data)) - ext4_error(sb, "ext4_free_inode", - "bit already cleared for inode %lu", ino); + ext4_error (sb, "ext4_free_inode", + "bit already cleared for inode %lu", ino); else { - gdp = ext4_get_group_desc(sb, block_group, &bh2); + gdp = ext4_get_group_desc (sb, block_group, &bh2); BUFFER_TRACE(bh2, "get_write_access"); fatal = ext4_journal_get_write_access(handle, bh2); @@ -290,7 +287,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, avefreei = freei / ngroups; for (group = 0; group < ngroups; group++) { - desc = ext4_get_group_desc(sb, group, NULL); + desc = ext4_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) @@ -579,16 +576,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * For other inodes, search forward from the parent directory's block * group to find a free inode. */ -struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) +struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) { struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; ext4_group_t group = 0; unsigned long ino = 0; - struct inode *inode; - struct ext4_group_desc *gdp = NULL; - struct ext4_super_block *es; + struct inode * inode; + struct ext4_group_desc * gdp = NULL; + struct ext4_super_block * es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; int ret2, err = 0; @@ -616,7 +613,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) } if (S_ISDIR(mode)) { - if (test_opt(sb, OLDALLOC)) + if (test_opt (sb, OLDALLOC)) ret2 = find_group_dir(sb, dir, &group); else ret2 = find_group_orlov(sb, dir, &group); @@ -786,7 +783,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) } inode->i_uid = current->fsuid; - if (test_opt(sb, GRPID)) + if (test_opt (sb, GRPID)) inode->i_gid = dir->i_gid; else if (dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; @@ -819,6 +816,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ei->i_flags &= ~EXT4_DIRSYNC_FL; ei->i_file_acl = 0; ei->i_dtime = 0; + ei->i_block_alloc_info = NULL; ei->i_block_group = group; ext4_set_inode_flags(inode); @@ -834,7 +832,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; - if (DQUOT_ALLOC_INODE(inode)) { + if(DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; goto fail_drop; } @@ -843,7 +841,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) if (err) goto fail_free_drop; - err = ext4_init_security(handle, inode, dir); + err = ext4_init_security(handle,inode, dir); if (err) goto fail_free_drop; @@ -961,7 +959,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) return ERR_PTR(err); } -unsigned long ext4_count_free_inodes(struct super_block *sb) +unsigned long ext4_count_free_inodes (struct super_block * sb) { unsigned long desc_count; struct ext4_group_desc *gdp; @@ -976,7 +974,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) bitmap_count = 0; gdp = NULL; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - gdp = ext4_get_group_desc(sb, i, NULL); + gdp = ext4_get_group_desc (sb, i, NULL); if (!gdp) continue; desc_count += le16_to_cpu(gdp->bg_free_inodes_count); @@ -991,14 +989,13 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) bitmap_count += x; } brelse(bitmap_bh); - printk(KERN_DEBUG "ext4_count_free_inodes: " - "stored = %u, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); + printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", + le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); return desc_count; #else desc_count = 0; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - gdp = ext4_get_group_desc(sb, i, NULL); + gdp = ext4_get_group_desc (sb, i, NULL); if (!gdp) continue; desc_count += le16_to_cpu(gdp->bg_free_inodes_count); @@ -1009,13 +1006,13 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) } /* Called at mount-time, super-block is locked */ -unsigned long ext4_count_dirs(struct super_block * sb) +unsigned long ext4_count_dirs (struct super_block * sb) { unsigned long count = 0; ext4_group_t i; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); + struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); if (!gdp) continue; count += le16_to_cpu(gdp->bg_used_dirs_count); diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c index 9b4ec9decfd1..7e91913e325b 100644 --- a/trunk/fs/ext4/inode.c +++ b/trunk/fs/ext4/inode.c @@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) /* * Called at the last iput() if i_nlink is zero. */ -void ext4_delete_inode(struct inode *inode) +void ext4_delete_inode (struct inode * inode) { handle_t *handle; int err; @@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, int final = 0; if (i_block < 0) { - ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); + ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); } else if (i_block < direct_blocks) { offsets[n++] = i_block; final = direct_blocks; - } else if ((i_block -= direct_blocks) < indirect_blocks) { + } else if ( (i_block -= direct_blocks) < indirect_blocks) { offsets[n++] = EXT4_IND_BLOCK; offsets[n++] = i_block; final = ptrs; @@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, *err = 0; /* i_data is not going away, no lock needed */ - add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); + add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); if (!p->key) goto no_block; while (--depth) { bh = sb_bread(sb, le32_to_cpu(p->key)); if (!bh) goto failure; - add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); + add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) goto no_block; @@ -443,7 +443,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) { struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; + __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; __le32 *p; ext4_fsblk_t bg_start; ext4_fsblk_t last_block; @@ -486,9 +486,18 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, Indirect *partial) { + struct ext4_block_alloc_info *block_i; + + block_i = EXT4_I(inode)->i_block_alloc_info; + /* - * XXX need to get goal block from mballoc's data structures + * try the heuristic for sequential allocation, + * failing that at least try to get decent locality. */ + if (block_i && (block == block_i->last_alloc_logical_block + 1) + && (block_i->last_alloc_physical_block != 0)) { + return block_i->last_alloc_physical_block + 1; + } return ext4_find_near(inode, partial); } @@ -621,7 +630,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, *err = 0; return ret; failed_out: - for (i = 0; i < index; i++) + for (i = 0; i b_data + offsets[n]; branch[n].key = cpu_to_le32(new_blocks[n]); *branch[n].p = branch[n].key; - if (n == indirect_blks) { + if ( n == indirect_blks) { current_block = new_blocks[n]; /* * End of chain, update the last new metablock of @@ -721,7 +730,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); ext4_journal_forget(handle, branch[i].bh); } - for (i = 0; i < indirect_blks; i++) + for (i = 0; i i_block_alloc_info; /* * If we're splicing into a [td]indirect block (as opposed to the * inode) then we need to get write access to the [td]indirect block @@ -772,7 +783,18 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, if (num == 0 && blks > 1) { current_block = le32_to_cpu(where->key) + 1; for (i = 1; i < blks; i++) - *(where->p + i) = cpu_to_le32(current_block++); + *(where->p + i ) = cpu_to_le32(current_block++); + } + + /* + * update the most recently allocated logical & physical block + * in i_block_alloc_info, to assist find the proper goal block for next + * allocation + */ + if (block_i) { + block_i->last_alloc_logical_block = block + blks - 1; + block_i->last_alloc_physical_block = + le32_to_cpu(where[num].key) + blks - 1; } /* We are done with atomic stuff, now do the rest of housekeeping */ @@ -892,8 +914,12 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, goto cleanup; /* - * Okay, we need to do block allocation. + * Okay, we need to do block allocation. Lazily initialize the block + * allocation info here if necessary */ + if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) + ext4_init_block_alloc_info(inode); + goal = ext4_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ @@ -1004,20 +1030,19 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - if (mdb_free) { - /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + /* Account for allocated meta_blocks */ + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; - /* update fs dirty blocks counter */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); - EXT4_I(inode)->i_allocated_meta_blocks = 0; - EXT4_I(inode)->i_reserved_meta_blocks = mdb; - } + /* update fs free blocks counter for truncate case */ + percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); /* update per-inode reservations */ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); EXT4_I(inode)->i_reserved_data_blocks -= used; + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + EXT4_I(inode)->i_reserved_meta_blocks = mdb; + EXT4_I(inode)->i_allocated_meta_blocks = 0; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } @@ -1135,8 +1160,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -int ext4_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) { handle_t *handle = ext4_journal_current_handle(); int ret = 0, started = 0; @@ -1216,7 +1241,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, BUFFER_TRACE(bh, "call get_create_access"); fatal = ext4_journal_get_create_access(handle, bh); if (!fatal && !buffer_uptodate(bh)) { - memset(bh->b_data, 0, inode->i_sb->s_blocksize); + memset(bh->b_data,0,inode->i_sb->s_blocksize); set_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1241,7 +1266,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, ext4_lblk_t block, int create, int *err) { - struct buffer_head *bh; + struct buffer_head * bh; bh = ext4_getblk(handle, inode, block, create, err); if (!bh) @@ -1257,13 +1282,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return NULL; } -static int walk_page_buffers(handle_t *handle, - struct buffer_head *head, - unsigned from, - unsigned to, - int *partial, - int (*fn)(handle_t *handle, - struct buffer_head *bh)) +static int walk_page_buffers( handle_t *handle, + struct buffer_head *head, + unsigned from, + unsigned to, + int *partial, + int (*fn)( handle_t *handle, + struct buffer_head *bh)) { struct buffer_head *bh; unsigned block_start, block_end; @@ -1271,9 +1296,9 @@ static int walk_page_buffers(handle_t *handle, int err, ret = 0; struct buffer_head *next; - for (bh = head, block_start = 0; - ret == 0 && (bh != head || !block_start); - block_start = block_end, bh = next) + for ( bh = head, block_start = 0; + ret == 0 && (bh != head || !block_start); + block_start = block_end, bh = next) { next = bh->b_this_page; block_end = block_start + blocksize; @@ -1326,23 +1351,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct inode *inode = mapping->host; + struct inode *inode = mapping->host; int ret, needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; - struct page *page; + struct page *page; pgoff_t index; - unsigned from, to; + unsigned from, to; index = pos >> PAGE_CACHE_SHIFT; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; retry: - handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; + handle = ext4_journal_start(inode, needed_blocks); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; } page = __grab_cache_page(mapping, index); @@ -1362,16 +1387,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, } if (ret) { - unlock_page(page); + unlock_page(page); ext4_journal_stop(handle); - page_cache_release(page); - /* - * block_write_begin may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + page_cache_release(page); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -1408,18 +1426,16 @@ static int ext4_ordered_write_end(struct file *file, ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { + /* + * generic_write_end() will run mark_inode_dirty() if i_size + * changes. So let's piggyback the i_disksize mark_inode_dirty + * into that. + */ loff_t new_i_size; new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } - + if (new_i_size > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = new_i_size; ret2 = generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; @@ -1444,14 +1460,8 @@ static int ext4_writeback_write_end(struct file *file, loff_t new_i_size; new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } + if (new_i_size > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = new_i_size; ret2 = generic_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -1476,7 +1486,6 @@ static int ext4_journalled_write_end(struct file *file, int ret = 0, ret2; int partial = 0; unsigned from, to; - loff_t new_i_size; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1491,12 +1500,11 @@ static int ext4_journalled_write_end(struct file *file, to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - new_i_size = pos + copied; - if (new_i_size > inode->i_size) + if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); + if (inode->i_size > EXT4_I(inode)->i_disksize) { + EXT4_I(inode)->i_disksize = inode->i_size; ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -1513,7 +1521,6 @@ static int ext4_journalled_write_end(struct file *file, static int ext4_da_reserve_space(struct inode *inode, int nrblocks) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned long md_needed, mdblocks, total = 0; @@ -1522,7 +1529,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&EXT4_I(inode)->i_block_reservation_lock); total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; mdblocks = ext4_calc_metadata_amount(inode, total); @@ -1531,14 +1537,13 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; total = md_needed + nrblocks; - if (ext4_claim_free_blocks(sbi, total)) { + if (ext4_has_free_blocks(sbi, total) < total) { spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - yield(); - goto repeat; - } return -ENOSPC; } + /* reduce fs free blocks counter */ + percpu_counter_sub(&sbi->s_freeblocks_counter, total); + EXT4_I(inode)->i_reserved_data_blocks += nrblocks; EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; @@ -1580,8 +1585,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) release = to_free + mdb_free; - /* update fs dirty blocks counter for truncate case */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); + /* update fs free blocks counter for truncate case */ + percpu_counter_add(&sbi->s_freeblocks_counter, release); /* update per-inode reservations */ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); @@ -1625,7 +1630,6 @@ struct mpage_da_data { struct writeback_control *wbc; int io_done; long pages_written; - int retval; }; /* @@ -1779,57 +1783,6 @@ static inline void __unmap_underlying_blocks(struct inode *inode, unmap_underlying_metadata(bdev, bh->b_blocknr + i); } -static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, - sector_t logical, long blk_cnt) -{ - int nr_pages, i; - pgoff_t index, end; - struct pagevec pvec; - struct inode *inode = mpd->inode; - struct address_space *mapping = inode->i_mapping; - - index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - end = (logical + blk_cnt - 1) >> - (PAGE_CACHE_SHIFT - inode->i_blkbits); - while (index <= end) { - nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); - if (nr_pages == 0) - break; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - index = page->index; - if (index > end) - break; - index++; - - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); - block_invalidatepage(page, 0); - ClearPageUptodate(page); - unlock_page(page); - } - } - return; -} - -static void ext4_print_free_blocks(struct inode *inode) -{ - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - printk(KERN_EMERG "Total free blocks count %lld\n", - ext4_count_free_blocks(inode->i_sb)); - printk(KERN_EMERG "Free/Dirty block details\n"); - printk(KERN_EMERG "free_blocks=%lld\n", - percpu_counter_sum(&sbi->s_freeblocks_counter)); - printk(KERN_EMERG "dirty_blocks=%lld\n", - percpu_counter_sum(&sbi->s_dirtyblocks_counter)); - printk(KERN_EMERG "Block reservation details\n"); - printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", - EXT4_I(inode)->i_reserved_data_blocks); - printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", - EXT4_I(inode)->i_reserved_meta_blocks); - return; -} - /* * mpage_da_map_blocks - go through given space * @@ -1839,69 +1792,32 @@ static void ext4_print_free_blocks(struct inode *inode) * The function skips space we know is already mapped to disk blocks. * */ -static int mpage_da_map_blocks(struct mpage_da_data *mpd) +static void mpage_da_map_blocks(struct mpage_da_data *mpd) { int err = 0; - struct buffer_head new; struct buffer_head *lbh = &mpd->lbh; - sector_t next; + sector_t next = lbh->b_blocknr; + struct buffer_head new; /* * We consider only non-mapped and non-allocated blocks */ if (buffer_mapped(lbh) && !buffer_delay(lbh)) - return 0; + return; + new.b_state = lbh->b_state; new.b_blocknr = 0; new.b_size = lbh->b_size; - next = lbh->b_blocknr; + /* * If we didn't accumulate anything * to write simply return */ if (!new.b_size) - return 0; + return; err = mpd->get_block(mpd->inode, next, &new, 1); - if (err) { - - /* If get block returns with error - * we simply return. Later writepage - * will redirty the page and writepages - * will find the dirty page again - */ - if (err == -EAGAIN) - return 0; - - if (err == -ENOSPC && - ext4_count_free_blocks(mpd->inode->i_sb)) { - mpd->retval = err; - return 0; - } - - /* - * get block failure will cause us - * to loop in writepages. Because - * a_ops->writepage won't be able to - * make progress. The page will be redirtied - * by writepage and writepages will again - * try to write the same. - */ - printk(KERN_EMERG "%s block allocation failed for inode %lu " - "at logical offset %llu with max blocks " - "%zd with error %d\n", - __func__, mpd->inode->i_ino, - (unsigned long long)next, - lbh->b_size >> mpd->inode->i_blkbits, err); - printk(KERN_EMERG "This should not happen.!! " - "Data will be lost\n"); - if (err == -ENOSPC) { - ext4_print_free_blocks(mpd->inode); - } - /* invlaidate all the pages */ - ext4_da_block_invalidatepages(mpd, next, - lbh->b_size >> mpd->inode->i_blkbits); - return err; - } + if (err) + return; BUG_ON(new.b_size == 0); if (buffer_new(&new)) @@ -1914,7 +1830,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) if (buffer_delay(lbh) || buffer_unwritten(lbh)) mpage_put_bnr_to_bhs(mpd, next, &new); - return 0; + return; } #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ @@ -1983,8 +1899,8 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, * We couldn't merge the block to our extent, so we * need to flush current extent and start new one */ - if (mpage_da_map_blocks(mpd) == 0) - mpage_da_submit_io(mpd); + mpage_da_map_blocks(mpd); + mpage_da_submit_io(mpd); mpd->io_done = 1; return; } @@ -2026,8 +1942,8 @@ static int __mpage_da_writepage(struct page *page, * and start IO on them using writepage() */ if (mpd->next_page != mpd->first_page) { - if (mpage_da_map_blocks(mpd) == 0) - mpage_da_submit_io(mpd); + mpage_da_map_blocks(mpd); + mpage_da_submit_io(mpd); /* * skip rest of the page in the page_vec */ @@ -2102,36 +2018,39 @@ static int __mpage_da_writepage(struct page *page, */ static int mpage_da_writepages(struct address_space *mapping, struct writeback_control *wbc, - struct mpage_da_data *mpd) + get_block_t get_block) { + struct mpage_da_data mpd; long to_write; int ret; - if (!mpd->get_block) + if (!get_block) return generic_writepages(mapping, wbc); - mpd->lbh.b_size = 0; - mpd->lbh.b_state = 0; - mpd->lbh.b_blocknr = 0; - mpd->first_page = 0; - mpd->next_page = 0; - mpd->io_done = 0; - mpd->pages_written = 0; - mpd->retval = 0; + mpd.wbc = wbc; + mpd.inode = mapping->host; + mpd.lbh.b_size = 0; + mpd.lbh.b_state = 0; + mpd.lbh.b_blocknr = 0; + mpd.first_page = 0; + mpd.next_page = 0; + mpd.get_block = get_block; + mpd.io_done = 0; + mpd.pages_written = 0; to_write = wbc->nr_to_write; - ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); + ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); /* * Handle last extent of pages */ - if (!mpd->io_done && mpd->next_page != mpd->first_page) { - if (mpage_da_map_blocks(mpd) == 0) - mpage_da_submit_io(mpd); + if (!mpd.io_done && mpd.next_page != mpd.first_page) { + mpage_da_map_blocks(&mpd); + mpage_da_submit_io(&mpd); } - wbc->nr_to_write = to_write - mpd->pages_written; + wbc->nr_to_write = to_write - mpd.pages_written; return ret; } @@ -2184,24 +2103,18 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, handle_t *handle = NULL; handle = ext4_journal_current_handle(); - BUG_ON(!handle); - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0, EXT4_DELALLOC_RSVED); - if (ret > 0) { + if (!handle) { + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, + bh_result, 0, 0, 0); + BUG_ON(!ret); + } else { + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, + bh_result, create, 0, EXT4_DELALLOC_RSVED); + } + if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); - if (ext4_should_order_data(inode)) { - int retval; - retval = ext4_jbd2_file_inode(handle, inode); - if (retval) - /* - * Failed to add inode for ordered - * mode. Don't update file size - */ - return retval; - } - /* * Update on-disk size along with block allocation * we don't use 'extend_disksize' as size may change @@ -2211,9 +2124,18 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, if (disksize > i_size_read(inode)) disksize = i_size_read(inode); if (disksize > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, disksize); - ret = ext4_mark_inode_dirty(handle, inode); - return ret; + /* + * XXX: replace with spinlock if seen contended -bzzz + */ + down_write(&EXT4_I(inode)->i_data_sem); + if (disksize > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = disksize; + up_write(&EXT4_I(inode)->i_data_sem); + + if (EXT4_I(inode)->i_disksize == disksize) { + ret = ext4_mark_inode_dirty(handle, inode); + return ret; + } } ret = 0; } @@ -2362,7 +2284,6 @@ static int ext4_da_writepages(struct address_space *mapping, { handle_t *handle = NULL; loff_t range_start = 0; - struct mpage_da_data mpd; struct inode *inode = mapping->host; int needed_blocks, ret = 0, nr_to_writebump = 0; long to_write, pages_skipped = 0; @@ -2396,9 +2317,6 @@ static int ext4_da_writepages(struct address_space *mapping, range_start = wbc->range_start; pages_skipped = wbc->pages_skipped; - mpd.wbc = wbc; - mpd.inode = mapping->host; - restart_loop: to_write = wbc->nr_to_write; while (!ret && to_write > 0) { @@ -2422,17 +2340,23 @@ static int ext4_da_writepages(struct address_space *mapping, dump_stack(); goto out_writepages; } - to_write -= wbc->nr_to_write; - - mpd.get_block = ext4_da_get_block_write; - ret = mpage_da_writepages(mapping, wbc, &mpd); + if (ext4_should_order_data(inode)) { + /* + * With ordered mode we need to add + * the inode to the journal handl + * when we do block allocation. + */ + ret = ext4_jbd2_file_inode(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out_writepages; + } + } + to_write -= wbc->nr_to_write; + ret = mpage_da_writepages(mapping, wbc, + ext4_da_get_block_write); ext4_journal_stop(handle); - - if (mpd.retval == -ENOSPC) - jbd2_journal_force_commit_nested(sbi->s_journal); - - /* reset the retry count */ if (ret == MPAGE_DA_EXTENT_TAIL) { /* * got one extent now try with @@ -2467,33 +2391,6 @@ static int ext4_da_writepages(struct address_space *mapping, return ret; } -#define FALL_BACK_TO_NONDELALLOC 1 -static int ext4_nonda_switch(struct super_block *sb) -{ - s64 free_blocks, dirty_blocks; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - /* - * switch to non delalloc mode if we are running low - * on free block. The free block accounting via percpu - * counters can get slightly wrong with FBC_BATCH getting - * accumulated on each CPU without updating global counters - * Delalloc need an accurate free block accounting. So switch - * to non delalloc when we are near to error range. - */ - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); - if (2 * free_blocks < 3 * dirty_blocks || - free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { - /* - * free block count is less that 150% of dirty blocks - * or free blocks is less that watermark - */ - return 1; - } - return 0; -} - static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -2509,12 +2406,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; - if (ext4_nonda_switch(inode->i_sb)) { - *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; - return ext4_write_begin(file, mapping, pos, - len, flags, pagep, fsdata); - } - *fsdata = (void *)0; retry: /* * With delayed allocation, we don't log the i_disksize update @@ -2542,13 +2433,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, unlock_page(page); ext4_journal_stop(handle); page_cache_release(page); - /* - * block_write_begin may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -2572,7 +2456,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, bh = page_buffers(page); idx = offset >> inode->i_blkbits; - for (i = 0; i < idx; i++) + for (i=0; i < idx; i++) bh = bh->b_this_page; if (!buffer_mapped(bh) || (buffer_delay(bh))) @@ -2590,22 +2474,9 @@ static int ext4_da_write_end(struct file *file, handle_t *handle = ext4_journal_current_handle(); loff_t new_i_size; unsigned long start, end; - int write_mode = (int)(unsigned long)fsdata; - - if (write_mode == FALL_BACK_TO_NONDELALLOC) { - if (ext4_should_order_data(inode)) { - return ext4_ordered_write_end(file, mapping, pos, - len, copied, page, fsdata); - } else if (ext4_should_writeback_data(inode)) { - return ext4_writeback_write_end(file, mapping, pos, - len, copied, page, fsdata); - } else { - BUG(); - } - } start = pos & (PAGE_CACHE_SIZE - 1); - end = start + copied - 1; + end = start + copied -1; /* * generic_write_end() will run mark_inode_dirty() if i_size @@ -2629,11 +2500,6 @@ static int ext4_da_write_end(struct file *file, EXT4_I(inode)->i_disksize = new_i_size; } up_write(&EXT4_I(inode)->i_data_sem); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); } } ret2 = generic_write_end(file, mapping, pos, len, copied, @@ -2725,7 +2591,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) return 0; } - return generic_block_bmap(mapping, block, ext4_get_block); + return generic_block_bmap(mapping,block,ext4_get_block); } static int bget_one(handle_t *handle, struct buffer_head *bh) @@ -3331,7 +3197,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, if (!partial->key && *partial->p) /* Writer: end */ goto no_top; - for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) + for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) ; /* * OK, we've found the last block that must survive. The rest of our @@ -3350,7 +3216,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, } /* Writer: end */ - while (partial > p) { + while(partial > p) { brelse(partial->bh); partial--; } @@ -3542,9 +3408,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, /* This zaps the entire block. Bottom up. */ BUFFER_TRACE(bh, "free child branches"); ext4_free_branches(handle, inode, bh, - (__le32 *) bh->b_data, - (__le32 *) bh->b_data + addr_per_block, - depth); + (__le32*)bh->b_data, + (__le32*)bh->b_data + addr_per_block, + depth); /* * We've probably journalled the indirect block several @@ -3712,7 +3578,7 @@ void ext4_truncate(struct inode *inode) */ down_write(&ei->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_reservation(inode); /* * The orphan list entry will now protect us from any crash which @@ -3807,6 +3673,41 @@ void ext4_truncate(struct inode *inode) ext4_journal_stop(handle); } +static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, + unsigned long ino, struct ext4_iloc *iloc) +{ + ext4_group_t block_group; + unsigned long offset; + ext4_fsblk_t block; + struct ext4_group_desc *gdp; + + if (!ext4_valid_inum(sb, ino)) { + /* + * This error is already checked for in namei.c unless we are + * looking at an NFS filehandle, in which case no error + * report is needed + */ + return 0; + } + + block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); + gdp = ext4_get_group_desc(sb, block_group, NULL); + if (!gdp) + return 0; + + /* + * Figure out the offset within the block group inode table + */ + offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * + EXT4_INODE_SIZE(sb); + block = ext4_inode_table(sb, gdp) + + (offset >> EXT4_BLOCK_SIZE_BITS(sb)); + + iloc->block_group = block_group; + iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); + return block; +} + /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all @@ -3816,35 +3717,19 @@ void ext4_truncate(struct inode *inode) static int __ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc, int in_mem) { - struct ext4_group_desc *gdp; - struct buffer_head *bh; - struct super_block *sb = inode->i_sb; - ext4_fsblk_t block; - int inodes_per_block, inode_offset; - - iloc->bh = 0; - if (!ext4_valid_inum(sb, inode->i_ino)) - return -EIO; + ext4_fsblk_t block; + struct buffer_head *bh; - iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); - gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); - if (!gdp) + block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); + if (!block) return -EIO; - /* - * Figure out the offset within the block group inode table - */ - inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); - inode_offset = ((inode->i_ino - 1) % - EXT4_INODES_PER_GROUP(sb)); - block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); - iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); - - bh = sb_getblk(sb, block); + bh = sb_getblk(inode->i_sb, block); if (!bh) { - ext4_error(sb, "ext4_get_inode_loc", "unable to read " - "inode block - inode=%lu, block=%llu", - inode->i_ino, block); + ext4_error (inode->i_sb, "ext4_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%llu", + inode->i_ino, block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -3872,12 +3757,28 @@ static int __ext4_get_inode_loc(struct inode *inode, */ if (in_mem) { struct buffer_head *bitmap_bh; - int i, start; - - start = inode_offset & ~(inodes_per_block - 1); + struct ext4_group_desc *desc; + int inodes_per_buffer; + int inode_offset, i; + ext4_group_t block_group; + int start; + + block_group = (inode->i_ino - 1) / + EXT4_INODES_PER_GROUP(inode->i_sb); + inodes_per_buffer = bh->b_size / + EXT4_INODE_SIZE(inode->i_sb); + inode_offset = ((inode->i_ino - 1) % + EXT4_INODES_PER_GROUP(inode->i_sb)); + start = inode_offset & ~(inodes_per_buffer - 1); /* Is the inode bitmap in cache? */ - bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); + desc = ext4_get_group_desc(inode->i_sb, + block_group, NULL); + if (!desc) + goto make_io; + + bitmap_bh = sb_getblk(inode->i_sb, + ext4_inode_bitmap(inode->i_sb, desc)); if (!bitmap_bh) goto make_io; @@ -3890,14 +3791,14 @@ static int __ext4_get_inode_loc(struct inode *inode, brelse(bitmap_bh); goto make_io; } - for (i = start; i < start + inodes_per_block; i++) { + for (i = start; i < start + inodes_per_buffer; i++) { if (i == inode_offset) continue; if (ext4_test_bit(i, bitmap_bh->b_data)) break; } brelse(bitmap_bh); - if (i == start + inodes_per_block) { + if (i == start + inodes_per_buffer) { /* all other inodes are free, so skip I/O */ memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); @@ -3907,36 +3808,6 @@ static int __ext4_get_inode_loc(struct inode *inode, } make_io: - /* - * If we need to do any I/O, try to pre-readahead extra - * blocks from the inode table. - */ - if (EXT4_SB(sb)->s_inode_readahead_blks) { - ext4_fsblk_t b, end, table; - unsigned num; - - table = ext4_inode_table(sb, gdp); - /* Make sure s_inode_readahead_blks is a power of 2 */ - while (EXT4_SB(sb)->s_inode_readahead_blks & - (EXT4_SB(sb)->s_inode_readahead_blks-1)) - EXT4_SB(sb)->s_inode_readahead_blks = - (EXT4_SB(sb)->s_inode_readahead_blks & - (EXT4_SB(sb)->s_inode_readahead_blks-1)); - b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); - if (table > b) - b = table; - end = b + EXT4_SB(sb)->s_inode_readahead_blks; - num = EXT4_INODES_PER_GROUP(sb); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) - num -= le16_to_cpu(gdp->bg_itable_unused); - table += num / inodes_per_block; - if (end > table) - end = table; - while (b <= end) - sb_breadahead(sb, b++); - } - /* * There are other valid inodes in the buffer, this inode * has in-inode xattrs, or we don't have this inode in memory. @@ -3947,9 +3818,10 @@ static int __ext4_get_inode_loc(struct inode *inode, submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_error(sb, __func__, - "unable to read inode block - inode=%lu, " - "block=%llu", inode->i_ino, block); + ext4_error(inode->i_sb, "ext4_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%llu", + inode->i_ino, block); brelse(bh); return -EIO; } @@ -4041,10 +3913,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT4_I(inode); -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif + ei->i_block_alloc_info = NULL; ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) @@ -4054,7 +3927,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); - if (!(test_opt(inode->i_sb, NO_UID32))) { + if(!(test_opt (inode->i_sb, NO_UID32))) { inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } @@ -4072,7 +3945,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (inode->i_mode == 0 || !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ - brelse(bh); + brelse (bh); ret = -ESTALE; goto bad_inode; } @@ -4105,7 +3978,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { - brelse(bh); + brelse (bh); ret = -EIO; goto bad_inode; } @@ -4158,7 +4031,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) init_special_inode(inode, inode->i_mode, new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } - brelse(iloc.bh); + brelse (iloc.bh); ext4_set_inode_flags(inode); unlock_new_inode(inode); return inode; @@ -4240,14 +4113,14 @@ static int ext4_do_update_inode(handle_t *handle, ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if (!(test_opt(inode->i_sb, NO_UID32))) { + if(!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ - if (!ei->i_dtime) { + if(!ei->i_dtime) { raw_inode->i_uid_high = cpu_to_le16(high_16_bits(inode->i_uid)); raw_inode->i_gid_high = @@ -4335,7 +4208,7 @@ static int ext4_do_update_inode(handle_t *handle, ei->i_state &= ~EXT4_STATE_NEW; out_brelse: - brelse(bh); + brelse (bh); ext4_std_error(inode->i_sb, err); return err; } @@ -4938,7 +4811,6 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) loff_t size; unsigned long len; int ret = -EINVAL; - void *fsdata; struct file *file = vma->vm_file; struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; @@ -4977,11 +4849,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) * on the same page though */ ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), - len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (ret < 0) goto out_unlock; ret = mapping->a_ops->write_end(file, mapping, page_offset(page), - len, len, page, fsdata); + len, len, page, NULL); if (ret < 0) goto out_unlock; ret = 0; diff --git a/trunk/fs/ext4/ioctl.c b/trunk/fs/ext4/ioctl.c index ea27eaa0cfe5..7a6c2f1faba6 100644 --- a/trunk/fs/ext4/ioctl.c +++ b/trunk/fs/ext4/ioctl.c @@ -23,8 +23,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct inode *inode = filp->f_dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; + unsigned short rsv_window_size; - ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); + ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case EXT4_IOC_GETFLAGS: @@ -33,7 +34,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { handle_t *handle = NULL; - int err, migrate = 0; + int err; struct ext4_iloc iloc; unsigned int oldflags; unsigned int jflag; @@ -81,17 +82,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } - if (oldflags & EXT4_EXTENTS_FL) { - /* We don't support clearning extent flags */ - if (!(flags & EXT4_EXTENTS_FL)) { - err = -EOPNOTSUPP; - goto flags_out; - } - } else if (flags & EXT4_EXTENTS_FL) { - /* migrate the file */ - migrate = 1; - flags &= ~EXT4_EXTENTS_FL; - } handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { @@ -119,10 +109,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) err = ext4_change_inode_journal_flag(inode, jflag); - if (err) - goto flags_out; - if (migrate) - err = ext4_ext_migrate(inode); flags_out: mutex_unlock(&inode->i_mutex); mnt_drop_write(filp->f_path.mnt); @@ -189,6 +175,49 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ret; } #endif + case EXT4_IOC_GETRSVSZ: + if (test_opt(inode->i_sb, RESERVATION) + && S_ISREG(inode->i_mode) + && ei->i_block_alloc_info) { + rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; + return put_user(rsv_window_size, (int __user *)arg); + } + return -ENOTTY; + case EXT4_IOC_SETRSVSZ: { + int err; + + if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) + return -ENOTTY; + + if (!is_owner_or_cap(inode)) + return -EACCES; + + if (get_user(rsv_window_size, (int __user *)arg)) + return -EFAULT; + + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + + if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) + rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; + + /* + * need to allocate reservation structure for this inode + * before set the window size + */ + down_write(&ei->i_data_sem); + if (!ei->i_block_alloc_info) + ext4_init_block_alloc_info(inode); + + if (ei->i_block_alloc_info){ + struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; + rsv->rsv_goal_size = rsv_window_size; + } + up_write(&ei->i_data_sem); + mnt_drop_write(filp->f_path.mnt); + return 0; + } case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; @@ -238,26 +267,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } case EXT4_IOC_MIGRATE: - { - int err; - if (!is_owner_or_cap(inode)) - return -EACCES; - - err = mnt_want_write(filp->f_path.mnt); - if (err) - return err; - /* - * inode_mutex prevent write and truncate on the file. - * Read still goes through. We take i_data_sem in - * ext4_ext_swap_inode_data before we switch the - * inode format to prevent read. - */ - mutex_lock(&(inode->i_mutex)); - err = ext4_ext_migrate(inode); - mutex_unlock(&(inode->i_mutex)); - mnt_drop_write(filp->f_path.mnt); - return err; - } + return ext4_ext_migrate(inode, filp, cmd, arg); default: return -ENOTTY; diff --git a/trunk/fs/ext4/mballoc.c b/trunk/fs/ext4/mballoc.c index b580714f0d85..e0e3a5eb1ddb 100644 --- a/trunk/fs/ext4/mballoc.c +++ b/trunk/fs/ext4/mballoc.c @@ -477,10 +477,9 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { - printk(KERN_ERR "corruption in group %lu " - "at byte %u(%u): %x in copy != %x " - "on disk/prealloc\n", - e4b->bd_group, i, i * 8, b1[i], b2[i]); + printk("corruption in group %lu at byte %u(%u):" + " %x in copy != %x on disk/prealloc\n", + e4b->bd_group, i, i * 8, b1[i], b2[i]); BUG(); } } @@ -534,6 +533,9 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, void *buddy; void *buddy2; + if (!test_opt(sb, MBALLOC)) + return 0; + { static int mb_check_counter; if (mb_check_counter++ % 100 != 0) @@ -782,11 +784,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if (bh[i] == NULL) goto out; - if (buffer_uptodate(bh[i]) && - !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) + if (bh_uptodate_or_lock(bh[i])) continue; - lock_buffer(bh[i]); spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], @@ -2169,10 +2169,9 @@ static void ext4_mb_history_release(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - if (sbi->s_proc != NULL) { - remove_proc_entry("mb_groups", sbi->s_proc); - remove_proc_entry("mb_history", sbi->s_proc); - } + remove_proc_entry("mb_groups", sbi->s_mb_proc); + remove_proc_entry("mb_history", sbi->s_mb_proc); + kfree(sbi->s_mb_history); } @@ -2181,10 +2180,10 @@ static void ext4_mb_history_init(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int i; - if (sbi->s_proc != NULL) { - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, + if (sbi->s_mb_proc != NULL) { + proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, &ext4_mb_seq_history_fops, sb); - proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, + proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, &ext4_mb_seq_groups_fops, sb); } @@ -2486,14 +2485,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) unsigned max; int ret; + if (!test_opt(sb, MBALLOC)) + return 0; + i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_offsets == NULL) { + clear_opt(sbi->s_mount_opt, MBALLOC); return -ENOMEM; } sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_maxs == NULL) { + clear_opt(sbi->s_mount_opt, MBALLOC); kfree(sbi->s_mb_maxs); return -ENOMEM; } @@ -2516,6 +2520,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) /* init file for buddy data */ ret = ext4_mb_init_backend(sb); if (ret != 0) { + clear_opt(sbi->s_mount_opt, MBALLOC); kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); return ret; @@ -2535,15 +2540,17 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; - sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); + i = sizeof(struct ext4_locality_group) * nr_cpu_ids; + sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); if (sbi->s_locality_groups == NULL) { + clear_opt(sbi->s_mount_opt, MBALLOC); kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); return -ENOMEM; } - for_each_possible_cpu(i) { + for (i = 0; i < nr_cpu_ids; i++) { struct ext4_locality_group *lg; - lg = per_cpu_ptr(sbi->s_locality_groups, i); + lg = &sbi->s_locality_groups[i]; mutex_init(&lg->lg_mutex); for (j = 0; j < PREALLOC_TB_SIZE; j++) INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); @@ -2553,7 +2560,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ext4_mb_init_per_dev_proc(sb); ext4_mb_history_init(sb); - printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); + printk("EXT4-fs: mballoc enabled\n"); return 0; } @@ -2582,6 +2589,9 @@ int ext4_mb_release(struct super_block *sb) struct ext4_group_info *grinfo; struct ext4_sb_info *sbi = EXT4_SB(sb); + if (!test_opt(sb, MBALLOC)) + return 0; + /* release freed, non-committed blocks */ spin_lock(&sbi->s_md_lock); list_splice_init(&sbi->s_closed_transaction, @@ -2637,7 +2647,8 @@ int ext4_mb_release(struct super_block *sb) atomic_read(&sbi->s_mb_discarded)); } - free_percpu(sbi->s_locality_groups); + kfree(sbi->s_locality_groups); + ext4_mb_history_release(sb); ext4_mb_destroy_per_dev_proc(sb); @@ -2710,46 +2721,118 @@ ext4_mb_free_committed_blocks(struct super_block *sb) #define EXT4_MB_STREAM_REQ "stream_req" #define EXT4_MB_GROUP_PREALLOC "group_prealloc" + + +#define MB_PROC_FOPS(name) \ +static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ +{ \ + struct ext4_sb_info *sbi = m->private; \ + \ + seq_printf(m, "%ld\n", sbi->s_mb_##name); \ + return 0; \ +} \ + \ +static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ +{ \ + return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ +} \ + \ +static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ + const char __user *buf, size_t cnt, loff_t *ppos) \ +{ \ + struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ + char str[32]; \ + long value; \ + if (cnt >= sizeof(str)) \ + return -EINVAL; \ + if (copy_from_user(str, buf, cnt)) \ + return -EFAULT; \ + value = simple_strtol(str, NULL, 0); \ + if (value <= 0) \ + return -ERANGE; \ + sbi->s_mb_##name = value; \ + return cnt; \ +} \ + \ +static const struct file_operations ext4_mb_##name##_proc_fops = { \ + .owner = THIS_MODULE, \ + .open = ext4_mb_##name##_proc_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + .write = ext4_mb_##name##_proc_write, \ +}; + +MB_PROC_FOPS(stats); +MB_PROC_FOPS(max_to_scan); +MB_PROC_FOPS(min_to_scan); +MB_PROC_FOPS(order2_reqs); +MB_PROC_FOPS(stream_request); +MB_PROC_FOPS(group_prealloc); + +#define MB_PROC_HANDLER(name, var) \ +do { \ + proc = proc_create_data(name, mode, sbi->s_mb_proc, \ + &ext4_mb_##var##_proc_fops, sbi); \ + if (proc == NULL) { \ + printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ + goto err_out; \ + } \ +} while (0) + static int ext4_mb_init_per_dev_proc(struct super_block *sb) { mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; struct ext4_sb_info *sbi = EXT4_SB(sb); struct proc_dir_entry *proc; + char devname[64]; - if (sbi->s_proc == NULL) + if (proc_root_ext4 == NULL) { + sbi->s_mb_proc = NULL; return -EINVAL; + } + bdevname(sb->s_bdev, devname); + sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); + + MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); + MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); + MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); + MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); + MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); + MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); - EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); - EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); - EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); - EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); - EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); - EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); return 0; err_out: - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); + printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); + remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); + remove_proc_entry(devname, proc_root_ext4); + sbi->s_mb_proc = NULL; + return -ENOMEM; } static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); + char devname[64]; - if (sbi->s_proc == NULL) + if (sbi->s_mb_proc == NULL) return -EINVAL; - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); + bdevname(sb->s_bdev, devname); + remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); + remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); + remove_proc_entry(devname, proc_root_ext4); return 0; } @@ -2771,6 +2854,11 @@ int __init init_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); return -ENOMEM; } +#ifdef CONFIG_PROC_FS + proc_root_ext4 = proc_mkdir("fs/ext4", NULL); + if (proc_root_ext4 == NULL) + printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); +#endif return 0; } @@ -2779,6 +2867,9 @@ void exit_ext4_mballoc(void) /* XXX: synchronize_rcu(); */ kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); +#ifdef CONFIG_PROC_FS + remove_proc_entry("fs/ext4", NULL); +#endif } @@ -2788,7 +2879,7 @@ void exit_ext4_mballoc(void) */ static noinline_for_stack int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, - handle_t *handle, unsigned long reserv_blks) + handle_t *handle) { struct buffer_head *bitmap_bh = NULL; struct ext4_super_block *es; @@ -2877,16 +2968,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); + /* - * Now reduce the dirty block count also. Should not go negative + * free blocks account has already be reduced/reserved + * at write_begin() time for delayed allocation + * do not double accounting */ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) - /* release all the reserved blocks if non delalloc */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); - else - percpu_counter_sub(&sbi->s_dirtyblocks_counter, - ac->ac_b_ex.fe_len); + percpu_counter_sub(&sbi->s_freeblocks_counter, + ac->ac_b_ex.fe_len); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, @@ -3794,7 +3884,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_discard_preallocations(struct inode *inode) +void ext4_mb_discard_inode_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -3806,7 +3896,7 @@ void ext4_discard_preallocations(struct inode *inode) struct ext4_buddy e4b; int err; - if (!S_ISREG(inode->i_mode)) { + if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ return; } @@ -4004,7 +4094,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) * per cpu locality group is to reduce the contention between block * request from multiple CPUs. */ - ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); + ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; + put_cpu(); /* we're going to use group allocation */ ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; @@ -4278,32 +4369,33 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, struct ext4_allocation_request *ar, int *errp) { - int freed; struct ext4_allocation_context *ac = NULL; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block = 0; - unsigned long inquota; - unsigned long reserv_blks = 0; + int freed; + int inquota; sb = ar->inode->i_sb; sbi = EXT4_SB(sb); + if (!test_opt(sb, MBALLOC)) { + block = ext4_old_new_blocks(handle, ar->inode, ar->goal, + &(ar->len), errp); + return block; + } if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { /* * With delalloc we already reserved the blocks */ - while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { - /* let others to free the space */ - yield(); - ar->len = ar->len >> 1; - } - if (!ar->len) { - *errp = -ENOSPC; - return 0; - } - reserv_blks = ar->len; + ar->len = ext4_has_free_blocks(sbi, ar->len); } + + if (ar->len == 0) { + *errp = -ENOSPC; + return 0; + } + while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { ar->flags |= EXT4_MB_HINT_NOPREALLOC; ar->len--; @@ -4349,7 +4441,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } if (likely(ac->ac_status == AC_STATUS_FOUND)) { - *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); + *errp = ext4_mb_mark_diskspace_used(ac, handle); if (*errp == -EAGAIN) { ac->ac_b_ex.fe_group = 0; ac->ac_b_ex.fe_start = 0; diff --git a/trunk/fs/ext4/mballoc.h b/trunk/fs/ext4/mballoc.h index b3b4828f8b89..c7c9906c2a75 100644 --- a/trunk/fs/ext4/mballoc.h +++ b/trunk/fs/ext4/mballoc.h @@ -257,6 +257,7 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) +static struct proc_dir_entry *proc_root_ext4; struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, diff --git a/trunk/fs/ext4/migrate.c b/trunk/fs/ext4/migrate.c index f2a9cf498ecd..46fc0b5b12ba 100644 --- a/trunk/fs/ext4/migrate.c +++ b/trunk/fs/ext4/migrate.c @@ -447,7 +447,8 @@ static int free_ext_block(handle_t *handle, struct inode *inode) } -int ext4_ext_migrate(struct inode *inode) +int ext4_ext_migrate(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) { handle_t *handle; int retval = 0, i; @@ -514,6 +515,12 @@ int ext4_ext_migrate(struct inode *inode) * trascation that created the inode. Later as and * when we add extents we extent the journal */ + /* + * inode_mutex prevent write and truncate on the file. Read still goes + * through. We take i_data_sem in ext4_ext_swap_inode_data before we + * switch the inode format to prevent read. + */ + mutex_lock(&(inode->i_mutex)); /* * Even though we take i_mutex we can still cause block allocation * via mmap write to holes. If we have allocated new blocks we fail @@ -616,6 +623,7 @@ int ext4_ext_migrate(struct inode *inode) tmp_inode->i_nlink = 0; ext4_journal_stop(handle); + mutex_unlock(&(inode->i_mutex)); if (tmp_inode) iput(tmp_inode); diff --git a/trunk/fs/ext4/namei.c b/trunk/fs/ext4/namei.c index 92db9e945147..387ad98350c3 100644 --- a/trunk/fs/ext4/namei.c +++ b/trunk/fs/ext4/namei.c @@ -151,36 +151,34 @@ struct dx_map_entry static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); -static inline unsigned dx_get_hash(struct dx_entry *entry); -static void dx_set_hash(struct dx_entry *entry, unsigned value); -static unsigned dx_get_count(struct dx_entry *entries); -static unsigned dx_get_limit(struct dx_entry *entries); -static void dx_set_count(struct dx_entry *entries, unsigned value); -static void dx_set_limit(struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit(struct inode *dir, unsigned infosize); -static unsigned dx_node_limit(struct inode *dir); -static struct dx_frame *dx_probe(const struct qstr *d_name, +static inline unsigned dx_get_hash (struct dx_entry *entry); +static void dx_set_hash (struct dx_entry *entry, unsigned value); +static unsigned dx_get_count (struct dx_entry *entries); +static unsigned dx_get_limit (struct dx_entry *entries); +static void dx_set_count (struct dx_entry *entries, unsigned value); +static void dx_set_limit (struct dx_entry *entries, unsigned value); +static unsigned dx_root_limit (struct inode *dir, unsigned infosize); +static unsigned dx_node_limit (struct inode *dir); +static struct dx_frame *dx_probe(struct dentry *dentry, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame, int *err); -static void dx_release(struct dx_frame *frames); -static int dx_make_map(struct ext4_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); +static void dx_release (struct dx_frame *frames); +static int dx_make_map (struct ext4_dir_entry_2 *de, int size, + struct dx_hash_info *hinfo, struct dx_map_entry map[]); static void dx_sort_map(struct dx_map_entry *map, unsigned count); -static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, +static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, struct dx_map_entry *offsets, int count); -static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); +static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block); static int ext4_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, struct dx_frame *frames, __u32 *start_hash); -static struct buffer_head * ext4_dx_find_entry(struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, - int *err); +static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, + struct ext4_dir_entry_2 **res_dir, int *err); static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); @@ -209,44 +207,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) entry->block = cpu_to_le32(value); } -static inline unsigned dx_get_hash(struct dx_entry *entry) +static inline unsigned dx_get_hash (struct dx_entry *entry) { return le32_to_cpu(entry->hash); } -static inline void dx_set_hash(struct dx_entry *entry, unsigned value) +static inline void dx_set_hash (struct dx_entry *entry, unsigned value) { entry->hash = cpu_to_le32(value); } -static inline unsigned dx_get_count(struct dx_entry *entries) +static inline unsigned dx_get_count (struct dx_entry *entries) { return le16_to_cpu(((struct dx_countlimit *) entries)->count); } -static inline unsigned dx_get_limit(struct dx_entry *entries) +static inline unsigned dx_get_limit (struct dx_entry *entries) { return le16_to_cpu(((struct dx_countlimit *) entries)->limit); } -static inline void dx_set_count(struct dx_entry *entries, unsigned value) +static inline void dx_set_count (struct dx_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); } -static inline void dx_set_limit(struct dx_entry *entries, unsigned value) +static inline void dx_set_limit (struct dx_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); } -static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) +static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - EXT4_DIR_REC_LEN(2) - infosize; return entry_space / sizeof(struct dx_entry); } -static inline unsigned dx_node_limit(struct inode *dir) +static inline unsigned dx_node_limit (struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); return entry_space / sizeof(struct dx_entry); @@ -256,12 +254,12 @@ static inline unsigned dx_node_limit(struct inode *dir) * Debug */ #ifdef DX_DEBUG -static void dx_show_index(char * label, struct dx_entry *entries) +static void dx_show_index (char * label, struct dx_entry *entries) { int i, n = dx_get_count (entries); - printk(KERN_DEBUG "%s index ", label); + printk("%s index ", label); for (i = 0; i < n; i++) { - printk("%x->%lu ", i ? dx_get_hash(entries + i) : + printk("%x->%lu ", i? dx_get_hash(entries + i) : 0, (unsigned long)dx_get_block(entries + i)); } printk("\n"); @@ -308,7 +306,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, struct dx_entry *entries, int levels) { unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count = dx_get_count(entries), names = 0, space = 0, i; + unsigned count = dx_get_count (entries), names = 0, space = 0, i; unsigned bcount = 0; struct buffer_head *bh; int err; @@ -327,12 +325,11 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, names += stats.names; space += stats.space; bcount += stats.bcount; - brelse(bh); + brelse (bh); } if (bcount) - printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", - levels ? "" : " ", names, space/bcount, - (space/bcount)*100/blocksize); + printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", + names, space/bcount,(space/bcount)*100/blocksize); return (struct stats) { names, space, bcount}; } #endif /* DX_DEBUG */ @@ -347,7 +344,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, * back to userspace. */ static struct dx_frame * -dx_probe(const struct qstr *d_name, struct inode *dir, +dx_probe(struct dentry *dentry, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) { unsigned count, indirect; @@ -358,6 +355,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir, u32 hash; frame->bh = NULL; + if (dentry) + dir = dentry->d_parent->d_inode; if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) goto fail; root = (struct dx_root *) bh->b_data; @@ -373,8 +372,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir, } hinfo->hash_version = root->info.hash_version; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; - if (d_name) - ext4fs_dirhash(d_name->name, d_name->len, hinfo); + if (dentry) + ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); hash = hinfo->hash; if (root->info.unused_flags & 1) { @@ -407,7 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail; } - dxtrace(printk("Look up %x", hash)); + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); @@ -556,7 +555,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, 0, &err))) return err; /* Failure */ p++; - brelse(p->bh); + brelse (p->bh); p->bh = bh; p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; } @@ -594,7 +593,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, /* On error, skip the f_pos to the next block. */ dir_file->f_pos = (dir_file->f_pos | (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); + brelse (bh); return count; } ext4fs_dirhash(de->name, de->name_len, hinfo); @@ -636,8 +635,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, int ret, err; __u32 hashval; - dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", - start_hash, start_minor_hash)); + dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, + start_minor_hash)); dir = dir_file->f_path.dentry->d_inode; if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; @@ -649,7 +648,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, } hinfo.hash = start_hash; hinfo.minor_hash = 0; - frame = dx_probe(NULL, dir, &hinfo, frames, &err); + frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); if (!frame) return err; @@ -695,8 +694,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, break; } dx_release(frames); - dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " - "next hash: %x\n", count, *next_hash)); + dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", + count, *next_hash)); return count; errout: dx_release(frames); @@ -803,17 +802,17 @@ static inline int ext4_match (int len, const char * const name, /* * Returns 0 if not found, -1 on failure, and 1 on success */ -static inline int search_dirblock(struct buffer_head *bh, +static inline int search_dirblock(struct buffer_head * bh, struct inode *dir, - const struct qstr *d_name, + struct dentry *dentry, unsigned long offset, struct ext4_dir_entry_2 ** res_dir) { struct ext4_dir_entry_2 * de; char * dlimit; int de_len; - const char *name = d_name->name; - int namelen = d_name->len; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; de = (struct ext4_dir_entry_2 *) bh->b_data; dlimit = bh->b_data + dir->i_sb->s_blocksize; @@ -852,13 +851,12 @@ static inline int search_dirblock(struct buffer_head *bh, * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext4_find_entry (struct inode *dir, - const struct qstr *d_name, +static struct buffer_head * ext4_find_entry (struct dentry *dentry, struct ext4_dir_entry_2 ** res_dir) { - struct super_block *sb; - struct buffer_head *bh_use[NAMEI_RA_SIZE]; - struct buffer_head *bh, *ret = NULL; + struct super_block * sb; + struct buffer_head * bh_use[NAMEI_RA_SIZE]; + struct buffer_head * bh, *ret = NULL; ext4_lblk_t start, block, b; int ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ @@ -867,15 +865,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, int num = 0; ext4_lblk_t nblocks; int i, err; + struct inode *dir = dentry->d_parent->d_inode; int namelen; *res_dir = NULL; sb = dir->i_sb; - namelen = d_name->len; + namelen = dentry->d_name.len; if (namelen > EXT4_NAME_LEN) return NULL; if (is_dx(dir)) { - bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); + bh = ext4_dx_find_entry(dentry, res_dir, &err); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the @@ -883,8 +882,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, */ if (bh || (err != ERR_BAD_DX_DIR)) return bh; - dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " - "falling back\n")); + dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); start = EXT4_I(dir)->i_dir_start_lookup; @@ -928,7 +926,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, brelse(bh); goto next; } - i = search_dirblock(bh, dir, d_name, + i = search_dirblock(bh, dir, dentry, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -958,11 +956,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) - brelse(bh_use[ra_ptr]); + brelse (bh_use[ra_ptr]); return ret; } -static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, +static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, struct ext4_dir_entry_2 **res_dir, int *err) { struct super_block * sb; @@ -973,13 +971,14 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q struct buffer_head *bh; ext4_lblk_t block; int retval; - int namelen = d_name->len; - const u8 *name = d_name->name; + int namelen = dentry->d_name.len; + const u8 *name = dentry->d_name.name; + struct inode *dir = dentry->d_parent->d_inode; sb = dir->i_sb; /* NFS may look up ".." - look at dx_root directory block */ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ - if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) + if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) return NULL; } else { frame = frames; @@ -1011,7 +1010,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q return bh; } } - brelse(bh); + brelse (bh); /* Check to see if we should continue to search */ retval = ext4_htree_next_block(dir, hash, frame, frames, NULL); @@ -1026,25 +1025,25 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q *err = -ENOENT; errout: - dxtrace(printk(KERN_DEBUG "%s not found\n", name)); + dxtrace(printk("%s not found\n", name)); dx_release (frames); return NULL; } -static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { - struct inode *inode; - struct ext4_dir_entry_2 *de; - struct buffer_head *bh; + struct inode * inode; + struct ext4_dir_entry_2 * de; + struct buffer_head * bh; if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - bh = ext4_find_entry(dir, &dentry->d_name, &de); + bh = ext4_find_entry(dentry, &de); inode = NULL; if (bh) { unsigned long ino = le32_to_cpu(de->inode); - brelse(bh); + brelse (bh); if (!ext4_valid_inum(dir->i_sb, ino)) { ext4_error(dir->i_sb, "ext4_lookup", "bad inode number: %lu", ino); @@ -1063,14 +1062,15 @@ struct dentry *ext4_get_parent(struct dentry *child) unsigned long ino; struct dentry *parent; struct inode *inode; - static const struct qstr dotdot = { - .name = "..", - .len = 2, - }; + struct dentry dotdot; struct ext4_dir_entry_2 * de; struct buffer_head *bh; - bh = ext4_find_entry(child->d_inode, &dotdot, &de); + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + dotdot.d_parent = child; /* confusing, isn't it! */ + + bh = ext4_find_entry(&dotdot, &de); inode = NULL; if (!bh) return ERR_PTR(-ENOENT); @@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* create map in the end of data2 block */ map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map((struct ext4_dir_entry_2 *) data1, + count = dx_make_map ((struct ext4_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - dx_sort_map(map, count); + dx_sort_map (map, count); /* Split the existing block in the middle, size-wise */ size = 0; move = 0; @@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); - de = dx_pack_dirents(data1, blocksize); + de = dx_pack_dirents(data1,blocksize); de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); @@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, swap(*bh, bh2); de = de2; } - dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_journal_dirty_metadata(handle, bh2); + dx_insert_block (frame, hash2 + continued, newblock); + err = ext4_journal_dirty_metadata (handle, bh2); if (err) goto journal_error; - err = ext4_journal_dirty_metadata(handle, frame->bh); + err = ext4_journal_dirty_metadata (handle, frame->bh); if (err) goto journal_error; - brelse(bh2); - dxtrace(dx_show_index("frame", frame->entries)); + brelse (bh2); + dxtrace(dx_show_index ("frame", frame->entries)); return de; journal_error: @@ -1271,7 +1271,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, */ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, struct inode *inode, struct ext4_dir_entry_2 *de, - struct buffer_head *bh) + struct buffer_head * bh) { struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; @@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, while ((char *) de <= top) { if (!ext4_check_dir_entry("ext4_add_entry", dir, de, bh, offset)) { - brelse(bh); + brelse (bh); return -EIO; } - if (ext4_match(namelen, name, de)) { - brelse(bh); + if (ext4_match (namelen, name, de)) { + brelse (bh); return -EEXIST; } nlen = EXT4_DIR_REC_LEN(de->name_len); @@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, } else de->inode = 0; de->name_len = namelen; - memcpy(de->name, name, namelen); + memcpy (de->name, name, namelen); /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend @@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; - dxtrace(printk(KERN_DEBUG "Creating index\n")); + dxtrace(printk("Creating index\n")); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, } root = (struct dx_root *) bh->b_data; - bh2 = ext4_append(handle, dir, &block, &retval); + bh2 = ext4_append (handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); return retval; @@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, root->info.info_length = sizeof(root->info); root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; entries = root->entries; - dx_set_block(entries, 1); - dx_set_count(entries, 1); - dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); + dx_set_block (entries, 1); + dx_set_count (entries, 1); + dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); /* Initialize as for dx_probe */ hinfo.hash_version = root->info.hash_version; @@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, * may not sleep between calling this and putting something into * the entry, as someone else might have used it while you slept. */ -static int ext4_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) +static int ext4_add_entry (handle_t *handle, struct dentry *dentry, + struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; unsigned long offset; - struct buffer_head *bh; + struct buffer_head * bh; struct ext4_dir_entry_2 *de; - struct super_block *sb; + struct super_block * sb; int retval; int dx_fallback=0; unsigned blocksize; @@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct dx_frame frames[2], *frame; struct dx_entry *entries, *at; struct dx_hash_info hinfo; - struct buffer_head *bh; + struct buffer_head * bh; struct inode *dir = dentry->d_parent->d_inode; - struct super_block *sb = dir->i_sb; + struct super_block * sb = dir->i_sb; struct ext4_dir_entry_2 *de; int err; - frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); + frame = dx_probe(dentry, NULL, &hinfo, frames, &err); if (!frame) return err; entries = frame->entries; @@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, } /* Block full, should compress but for now just split */ - dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", + dxtrace(printk("using %u of %u node entries\n", dx_get_count(entries), dx_get_limit(entries))); /* Need to split index? */ if (dx_get_count(entries) == dx_get_limit(entries)) { @@ -1559,8 +1559,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels) { unsigned icount1 = icount/2, icount2 = icount - icount1; unsigned hash2 = dx_get_hash(entries + icount1); - dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", - icount1, icount2)); + dxtrace(printk("Split index %i/%i\n", icount1, icount2)); BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ err = ext4_journal_get_write_access(handle, @@ -1568,11 +1567,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; - memcpy((char *) entries2, (char *) (entries + icount1), - icount2 * sizeof(struct dx_entry)); - dx_set_count(entries, icount1); - dx_set_count(entries2, icount2); - dx_set_limit(entries2, dx_node_limit(dir)); + memcpy ((char *) entries2, (char *) (entries + icount1), + icount2 * sizeof(struct dx_entry)); + dx_set_count (entries, icount1); + dx_set_count (entries2, icount2); + dx_set_limit (entries2, dx_node_limit(dir)); /* Which index block gets the new entry? */ if (at - entries >= icount1) { @@ -1580,17 +1579,16 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, frame->entries = entries = entries2; swap(frame->bh, bh2); } - dx_insert_block(frames + 0, hash2, newblock); - dxtrace(dx_show_index("node", frames[1].entries)); - dxtrace(dx_show_index("node", + dx_insert_block (frames + 0, hash2, newblock); + dxtrace(dx_show_index ("node", frames[1].entries)); + dxtrace(dx_show_index ("node", ((struct dx_node *) bh2->b_data)->entries)); err = ext4_journal_dirty_metadata(handle, bh2); if (err) goto journal_error; brelse (bh2); } else { - dxtrace(printk(KERN_DEBUG - "Creating second level index...\n")); + dxtrace(printk("Creating second level index...\n")); memcpy((char *) entries2, (char *) entries, icount * sizeof(struct dx_entry)); dx_set_limit(entries2, dx_node_limit(dir)); @@ -1632,12 +1630,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, * ext4_delete_entry deletes a directory entry by merging it with the * previous entry */ -static int ext4_delete_entry(handle_t *handle, - struct inode *dir, - struct ext4_dir_entry_2 *de_del, - struct buffer_head *bh) +static int ext4_delete_entry (handle_t *handle, + struct inode * dir, + struct ext4_dir_entry_2 * de_del, + struct buffer_head * bh) { - struct ext4_dir_entry_2 *de, *pde; + struct ext4_dir_entry_2 * de, * pde; int i; i = 0; @@ -1718,11 +1716,11 @@ static int ext4_add_nondir(handle_t *handle, * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { handle_t *handle; - struct inode *inode; + struct inode * inode; int err, retries = 0; retry: @@ -1749,8 +1747,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, return err; } -static int ext4_mknod(struct inode *dir, struct dentry *dentry, - int mode, dev_t rdev) +static int ext4_mknod (struct inode * dir, struct dentry *dentry, + int mode, dev_t rdev) { handle_t *handle; struct inode *inode; @@ -1769,11 +1767,11 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode = ext4_new_inode(handle, dir, mode); + inode = ext4_new_inode (handle, dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR inode->i_op = &ext4_special_inode_operations; #endif err = ext4_add_nondir(handle, dentry, inode); @@ -1784,12 +1782,12 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, return err; } -static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) { handle_t *handle; - struct inode *inode; - struct buffer_head *dir_block; - struct ext4_dir_entry_2 *de; + struct inode * inode; + struct buffer_head * dir_block; + struct ext4_dir_entry_2 * de; int err, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) @@ -1805,7 +1803,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode = ext4_new_inode(handle, dir, S_IFDIR | mode); + inode = ext4_new_inode (handle, dir, S_IFDIR | mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -1813,7 +1811,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext4_bread(handle, inode, 0, 1, &err); + dir_block = ext4_bread (handle, inode, 0, 1, &err); if (!dir_block) goto out_clear_inode; BUFFER_TRACE(dir_block, "get_write_access"); @@ -1822,26 +1820,26 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); - strcpy(de->name, "."); + strcpy (de->name, "."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); de = ext4_next_entry(de); de->inode = cpu_to_le32(dir->i_ino); de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1)); de->name_len = 2; - strcpy(de->name, ".."); + strcpy (de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); ext4_journal_dirty_metadata(handle, dir_block); - brelse(dir_block); + brelse (dir_block); ext4_mark_inode_dirty(handle, inode); - err = ext4_add_entry(handle, dentry, inode); + err = ext4_add_entry (handle, dentry, inode); if (err) { out_clear_inode: clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); - iput(inode); + iput (inode); goto out_stop; } ext4_inc_count(handle, dir); @@ -1858,17 +1856,17 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) /* * routine to check that the specified directory is empty (for rmdir) */ -static int empty_dir(struct inode *inode) +static int empty_dir (struct inode * inode) { unsigned long offset; - struct buffer_head *bh; - struct ext4_dir_entry_2 *de, *de1; - struct super_block *sb; + struct buffer_head * bh; + struct ext4_dir_entry_2 * de, * de1; + struct super_block * sb; int err = 0; sb = inode->i_sb; if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || - !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { + !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { if (err) ext4_error(inode->i_sb, __func__, "error %d reading directory #%lu offset 0", @@ -1883,23 +1881,23 @@ static int empty_dir(struct inode *inode) de1 = ext4_next_entry(de); if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || - strcmp(".", de->name) || - strcmp("..", de1->name)) { - ext4_warning(inode->i_sb, "empty_dir", - "bad directory (dir #%lu) - no `.' or `..'", - inode->i_ino); - brelse(bh); + strcmp (".", de->name) || + strcmp ("..", de1->name)) { + ext4_warning (inode->i_sb, "empty_dir", + "bad directory (dir #%lu) - no `.' or `..'", + inode->i_ino); + brelse (bh); return 1; } offset = ext4_rec_len_from_disk(de->rec_len) + ext4_rec_len_from_disk(de1->rec_len); de = ext4_next_entry(de1); - while (offset < inode->i_size) { + while (offset < inode->i_size ) { if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { err = 0; - brelse(bh); - bh = ext4_bread(NULL, inode, + brelse (bh); + bh = ext4_bread (NULL, inode, offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); if (!bh) { if (err) @@ -1919,13 +1917,13 @@ static int empty_dir(struct inode *inode) continue; } if (le32_to_cpu(de->inode)) { - brelse(bh); + brelse (bh); return 0; } offset += ext4_rec_len_from_disk(de->rec_len); de = ext4_next_entry(de); } - brelse(bh); + brelse (bh); return 1; } @@ -1956,8 +1954,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. */ - J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); + J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); @@ -2071,12 +2069,12 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) goto out_err; } -static int ext4_rmdir(struct inode *dir, struct dentry *dentry) +static int ext4_rmdir (struct inode * dir, struct dentry *dentry) { int retval; - struct inode *inode; - struct buffer_head *bh; - struct ext4_dir_entry_2 *de; + struct inode * inode; + struct buffer_head * bh; + struct ext4_dir_entry_2 * de; handle_t *handle; /* Initialize quotas before so that eventual writes go in @@ -2087,7 +2085,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) return PTR_ERR(handle); retval = -ENOENT; - bh = ext4_find_entry(dir, &dentry->d_name, &de); + bh = ext4_find_entry (dentry, &de); if (!bh) goto end_rmdir; @@ -2101,16 +2099,16 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) goto end_rmdir; retval = -ENOTEMPTY; - if (!empty_dir(inode)) + if (!empty_dir (inode)) goto end_rmdir; retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_rmdir; if (!EXT4_DIR_LINK_EMPTY(inode)) - ext4_warning(inode->i_sb, "ext4_rmdir", - "empty directory has too many links (%d)", - inode->i_nlink); + ext4_warning (inode->i_sb, "ext4_rmdir", + "empty directory has too many links (%d)", + inode->i_nlink); inode->i_version++; clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is @@ -2126,16 +2124,16 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) end_rmdir: ext4_journal_stop(handle); - brelse(bh); + brelse (bh); return retval; } -static int ext4_unlink(struct inode *dir, struct dentry *dentry) +static int ext4_unlink(struct inode * dir, struct dentry *dentry) { int retval; - struct inode *inode; - struct buffer_head *bh; - struct ext4_dir_entry_2 *de; + struct inode * inode; + struct buffer_head * bh; + struct ext4_dir_entry_2 * de; handle_t *handle; /* Initialize quotas before so that eventual writes go @@ -2149,7 +2147,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) handle->h_sync = 1; retval = -ENOENT; - bh = ext4_find_entry(dir, &dentry->d_name, &de); + bh = ext4_find_entry (dentry, &de); if (!bh) goto end_unlink; @@ -2160,9 +2158,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) goto end_unlink; if (!inode->i_nlink) { - ext4_warning(inode->i_sb, "ext4_unlink", - "Deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); + ext4_warning (inode->i_sb, "ext4_unlink", + "Deleting nonexistent file (%lu), %d", + inode->i_ino, inode->i_nlink); inode->i_nlink = 1; } retval = ext4_delete_entry(handle, dir, de, bh); @@ -2180,15 +2178,15 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) end_unlink: ext4_journal_stop(handle); - brelse(bh); + brelse (bh); return retval; } -static int ext4_symlink(struct inode *dir, - struct dentry *dentry, const char *symname) +static int ext4_symlink (struct inode * dir, + struct dentry *dentry, const char * symname) { handle_t *handle; - struct inode *inode; + struct inode * inode; int l, err, retries = 0; l = strlen(symname)+1; @@ -2205,12 +2203,12 @@ static int ext4_symlink(struct inode *dir, if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); + inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; - if (l > sizeof(EXT4_I(inode)->i_data)) { + if (l > sizeof (EXT4_I(inode)->i_data)) { inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); /* @@ -2223,14 +2221,14 @@ static int ext4_symlink(struct inode *dir, if (err) { clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); - iput(inode); + iput (inode); goto out_stop; } } else { /* clear the extent format for fast symlink */ EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; inode->i_op = &ext4_fast_symlink_inode_operations; - memcpy((char *)&EXT4_I(inode)->i_data, symname, l); + memcpy((char*)&EXT4_I(inode)->i_data,symname,l); inode->i_size = l-1; } EXT4_I(inode)->i_disksize = inode->i_size; @@ -2242,8 +2240,8 @@ static int ext4_symlink(struct inode *dir, return err; } -static int ext4_link(struct dentry *old_dentry, - struct inode *dir, struct dentry *dentry) +static int ext4_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) { handle_t *handle; struct inode *inode = old_dentry->d_inode; @@ -2286,13 +2284,13 @@ static int ext4_link(struct dentry *old_dentry, * Anybody can rename anything with this: the permission checks are left to the * higher-level routines. */ -static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) { handle_t *handle; - struct inode *old_inode, *new_inode; - struct buffer_head *old_bh, *new_bh, *dir_bh; - struct ext4_dir_entry_2 *old_de, *new_de; + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct ext4_dir_entry_2 * old_de, * new_de; int retval; old_bh = new_bh = dir_bh = NULL; @@ -2310,7 +2308,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) handle->h_sync = 1; - old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); + old_bh = ext4_find_entry (old_dentry, &old_de); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -2323,32 +2321,32 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, goto end_rename; new_inode = new_dentry->d_inode; - new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); + new_bh = ext4_find_entry (new_dentry, &new_de); if (new_bh) { if (!new_inode) { - brelse(new_bh); + brelse (new_bh); new_bh = NULL; } } if (S_ISDIR(old_inode->i_mode)) { if (new_inode) { retval = -ENOTEMPTY; - if (!empty_dir(new_inode)) + if (!empty_dir (new_inode)) goto end_rename; } retval = -EIO; - dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); + dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) goto end_rename; retval = -EMLINK; - if (!new_inode && new_dir != old_dir && + if (!new_inode && new_dir!=old_dir && new_dir->i_nlink >= EXT4_LINK_MAX) goto end_rename; } if (!new_bh) { - retval = ext4_add_entry(handle, new_dentry, old_inode); + retval = ext4_add_entry (handle, new_dentry, old_inode); if (retval) goto end_rename; } else { @@ -2390,7 +2388,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *old_bh2; struct ext4_dir_entry_2 *old_de2; - old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); + old_bh2 = ext4_find_entry(old_dentry, &old_de2); if (old_bh2) { retval = ext4_delete_entry(handle, old_dir, old_de2, old_bh2); @@ -2435,9 +2433,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = 0; end_rename: - brelse(dir_bh); - brelse(old_bh); - brelse(new_bh); + brelse (dir_bh); + brelse (old_bh); + brelse (new_bh); ext4_journal_stop(handle); return retval; } @@ -2456,7 +2454,7 @@ const struct inode_operations ext4_dir_inode_operations = { .mknod = ext4_mknod, .rename = ext4_rename, .setattr = ext4_setattr, -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, @@ -2467,7 +2465,7 @@ const struct inode_operations ext4_dir_inode_operations = { const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, diff --git a/trunk/fs/ext4/resize.c b/trunk/fs/ext4/resize.c index b6ec1843a015..b3d35604ea18 100644 --- a/trunk/fs/ext4/resize.c +++ b/trunk/fs/ext4/resize.c @@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - /* - * If we are not using the primary superblock/GDT copy don't resize, + /* + * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a * bad time to do it anyways. */ @@ -870,10 +870,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * We can allocate memory for mb_alloc based on the new group * descriptor */ - err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); - if (err) - goto exit_journal; - + if (test_opt(sb, MBALLOC)) { + err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); + if (err) + goto exit_journal; + } /* * Make the new blocks and inodes valid next. We do this before * increasing the group count so that once the group is enabled, @@ -928,15 +929,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { - ext4_group_t flex_group; - flex_group = ext4_flex_group(sbi, input->group); - sbi->s_flex_groups[flex_group].free_blocks += - input->free_blocks_count; - sbi->s_flex_groups[flex_group].free_inodes += - EXT4_INODES_PER_GROUP(sb); - } - ext4_journal_dirty_metadata(handle, sbi->s_sbh); sb->s_dirt = 1; @@ -972,7 +964,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_group_t o_groups_count; ext4_grpblk_t last; ext4_grpblk_t add; - struct buffer_head *bh; + struct buffer_head * bh; handle_t *handle; int err; unsigned long freed_blocks; @@ -1085,15 +1077,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* * Mark mballoc pages as not up to date so that they will be updated * next time they are loaded by ext4_mb_load_buddy. - * - * XXX Bad, Bad, BAD!!! We should not be overloading the - * Uptodate flag, particularly on thte bitmap bh, as way of - * hinting to ext4_mb_load_buddy() that it needs to be - * overloaded. A user could take a LVM snapshot, then do an - * on-line fsck, and clear the uptodate flag, and this would - * not be a bug in userspace, but a bug in the kernel. FIXME!!! */ - { + if (test_opt(sb, MBALLOC)) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct inode *inode = sbi->s_buddy_cache; int blocks_per_page; diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c index 0e661c569660..566344b926b7 100644 --- a/trunk/fs/ext4/super.c +++ b/trunk/fs/ext4/super.c @@ -34,8 +34,6 @@ #include #include #include -#include -#include #include #include #include @@ -47,8 +45,6 @@ #include "namei.h" #include "group.h" -struct proc_dir_entry *ext4_proc_root; - static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_create_journal(struct super_block *, struct ext4_super_block *, @@ -512,12 +508,10 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); + BUFFER_TRACE(sbi->s_sbh, "marking dirty"); + mark_buffer_dirty(sbi->s_sbh); ext4_commit_super(sb, es, 1); } - if (sbi->s_proc) { - remove_proc_entry("inode_readahead_blks", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); @@ -526,7 +520,6 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); - percpu_counter_destroy(&sbi->s_dirtyblocks_counter); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -569,10 +562,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif + ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); @@ -605,7 +599,7 @@ static void init_once(void *foo) struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; INIT_LIST_HEAD(&ei->i_orphan); -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR init_rwsem(&ei->xattr_sem); #endif init_rwsem(&ei->i_data_sem); @@ -631,7 +625,8 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { -#ifdef CONFIG_EXT4_FS_POSIX_ACL + struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL if (EXT4_I(inode)->i_acl && EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { posix_acl_release(EXT4_I(inode)->i_acl); @@ -643,7 +638,10 @@ static void ext4_clear_inode(struct inode *inode) EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; } #endif - ext4_discard_preallocations(inode); + ext4_discard_reservation(inode); + EXT4_I(inode)->i_block_alloc_info = NULL; + if (unlikely(rsv)) + kfree(rsv); jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, &EXT4_I(inode)->jinode); } @@ -656,7 +654,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_jquota_fmt) seq_printf(seq, ",jqfmt=%s", - (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); + (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); if (sbi->s_qf_names[USRQUOTA]) seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); @@ -720,7 +718,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",debug"); if (test_opt(sb, OLDALLOC)) seq_puts(seq, ",oldalloc"); -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR if (test_opt(sb, XATTR_USER) && !(def_mount_opts & EXT4_DEFM_XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -729,7 +727,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nouser_xattr"); } #endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) seq_puts(seq, ",acl"); if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) @@ -754,6 +752,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) seq_puts(seq, ",noextents"); + if (!test_opt(sb, MBALLOC)) + seq_puts(seq, ",nomballoc"); if (test_opt(sb, I_VERSION)) seq_puts(seq, ",i_version"); if (!test_opt(sb, DELALLOC)) @@ -773,10 +773,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) seq_puts(seq, ",data=writeback"); - if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) - seq_printf(seq, ",inode_readahead_blks=%u", - sbi->s_inode_readahead_blks); - ext4_show_quota_options(seq, sb); return 0; } @@ -826,7 +822,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, } #ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") +#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) static int ext4_dquot_initialize(struct inode *inode, int type); @@ -911,7 +907,6 @@ enum { Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, - Opt_inode_readahead_blks }; static match_table_t tokens = { @@ -972,7 +967,6 @@ static match_table_t tokens = { {Opt_resize, "resize"}, {Opt_delalloc, "delalloc"}, {Opt_nodelalloc, "nodelalloc"}, - {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, {Opt_err, NULL}, }; @@ -987,7 +981,7 @@ static ext4_fsblk_t get_sb_block(void **data) /*todo: use simple_strtoll with >32bit ext4 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", + printk("EXT4-fs: Invalid sb specification: %s\n", (char *) *data); return 1; } @@ -1078,7 +1072,7 @@ static int parse_options(char *options, struct super_block *sb, case Opt_orlov: clear_opt(sbi->s_mount_opt, OLDALLOC); break; -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR case Opt_user_xattr: set_opt(sbi->s_mount_opt, XATTR_USER); break; @@ -1088,11 +1082,10 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_user_xattr: case Opt_nouser_xattr: - printk(KERN_ERR "EXT4 (no)user_xattr options " - "not supported\n"); + printk("EXT4 (no)user_xattr options not supported\n"); break; #endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL case Opt_acl: set_opt(sbi->s_mount_opt, POSIX_ACL); break; @@ -1102,8 +1095,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_acl: case Opt_noacl: - printk(KERN_ERR "EXT4 (no)acl options " - "not supported\n"); + printk("EXT4 (no)acl options not supported\n"); break; #endif case Opt_reservation: @@ -1197,8 +1189,8 @@ static int parse_options(char *options, struct super_block *sb, sb_any_quota_suspended(sb)) && !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT4-fs: Cannot change journaled " - "quota options when quota turned on.\n"); + "EXT4-fs: Cannot change journaled " + "quota options when quota turned on.\n"); return 0; } qname = match_strdup(&args[0]); @@ -1365,6 +1357,12 @@ static int parse_options(char *options, struct super_block *sb, case Opt_nodelalloc: clear_opt(sbi->s_mount_opt, DELALLOC); break; + case Opt_mballoc: + set_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_nomballoc: + clear_opt(sbi->s_mount_opt, MBALLOC); + break; case Opt_stripe: if (match_int(&args[0], &option)) return 0; @@ -1375,13 +1373,6 @@ static int parse_options(char *options, struct super_block *sb, case Opt_delalloc: set_opt(sbi->s_mount_opt, DELALLOC); break; - case Opt_inode_readahead_blks: - if (match_int(&args[0], &option)) - return 0; - if (option < 0 || option > (1 << 30)) - return 0; - sbi->s_inode_readahead_blks = option; - break; default: printk(KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -1482,9 +1473,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt); - printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", - sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : - "external", EXT4_SB(sb)->s_journal->j_devname); + printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); + if (EXT4_SB(sb)->s_journal->j_inode == NULL) { + char b[BDEVNAME_SIZE]; + + printk("external journal on %s\n", + bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); + } else { + printk("internal journal\n"); + } return res; } @@ -1507,11 +1504,8 @@ static int ext4_fill_flex_info(struct super_block *sb) sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; groups_per_flex = 1 << sbi->s_log_groups_per_flex; - /* We allocate both existing and potentially added groups */ - flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + - ((sbi->s_es->s_reserved_gdt_blocks +1 ) << - EXT4_DESC_PER_BLOCK_BITS(sb))) / - groups_per_flex; + flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / + groups_per_flex; sbi->s_flex_groups = kzalloc(flex_group_count * sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { @@ -1590,7 +1584,7 @@ static int ext4_check_descriptors(struct super_block *sb) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) flexbg_flag = 1; - ext4_debug("Checking group descriptors"); + ext4_debug ("Checking group descriptors"); for (i = 0; i < sbi->s_groups_count; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); @@ -1629,10 +1623,8 @@ static int ext4_check_descriptors(struct super_block *sb) "Checksum for group %lu failed (%u!=%u)\n", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, gdp)), le16_to_cpu(gdp->bg_checksum)); - if (!(sb->s_flags & MS_RDONLY)) { - spin_unlock(sb_bgl_lock(sbi, i)); + if (!(sb->s_flags & MS_RDONLY)) return 0; - } } spin_unlock(sb_bgl_lock(sbi, i)); if (!flexbg_flag) @@ -1722,9 +1714,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, DQUOT_INIT(inode); if (inode->i_nlink) { printk(KERN_DEBUG - "%s: truncating inode %lu to %lld bytes\n", + "%s: truncating inode %lu to %Ld bytes\n", __func__, inode->i_ino, inode->i_size); - jbd_debug(2, "truncating inode %lu to %lld bytes\n", + jbd_debug(2, "truncating inode %lu to %Ld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); nr_truncates++; @@ -1922,7 +1914,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; - char *cp; int ret = -EINVAL; int blocksize; int db_count; @@ -1939,15 +1930,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_opt = 0; sbi->s_resuid = EXT4_DEF_RESUID; sbi->s_resgid = EXT4_DEF_RESGID; - sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; unlock_kernel(); - /* Cleanup superblock name */ - for (cp = sb->s_id; (cp = strchr(cp, '/'));) - *cp = '!'; - blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) { printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); @@ -1987,11 +1973,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, GRPID); if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR if (def_mount_opts & EXT4_DEFM_XATTR_USER) set_opt(sbi->s_mount_opt, XATTR_USER); #endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL if (def_mount_opts & EXT4_DEFM_ACL) set_opt(sbi->s_mount_opt, POSIX_ACL); #endif @@ -2026,6 +2012,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_warning(sb, __func__, "extents feature not enabled on this filesystem, " "use tune2fs.\n"); + /* + * turn on mballoc code by default in ext4 filesystem + * Use -o nomballoc to turn it off + */ + set_opt(sbi->s_mount_opt, MBALLOC); /* * enable delayed allocation by default @@ -2049,6 +2040,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "EXT4-fs warning: feature flags set on rev 0 fs, " "running e2fsck is recommended\n"); + /* + * Since ext4 is still considered development code, we require + * that the TEST_FILESYS flag in s->flags be set. + */ + if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { + printk(KERN_WARNING "EXT4-fs: %s: not marked " + "OK to use with test code.\n", sb->s_id); + goto failed_mount; + } + /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -2218,14 +2219,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (ext4_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - - if (sbi->s_proc) - proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, - &ext4_ui_proc_fops, - &sbi->s_inode_readahead_blks); - bgl_lock_init(&sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -2264,14 +2257,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirs_counter, ext4_count_dirs(sb)); } - if (!err) { - err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); - } if (err) { printk(KERN_ERR "EXT4-fs: insufficient memory\n"); goto failed_mount3; } + /* per fileystem reservation list head & lock */ + spin_lock_init(&sbi->s_rsv_window_lock); + sbi->s_rsv_window_root = RB_ROOT; + /* Add a single, static dummy reservation to the start of the + * reservation window list --- it gives us a placeholder for + * append-at-start-of-list which makes the allocation logic + * _much_ simpler. */ + sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; + sbi->s_rsv_window_head.rsv_alloc_hit = 0; + sbi->s_rsv_window_head.rsv_goal_size = 0; + ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); + sbi->s_stripe = ext4_get_stripe_size(sbi); /* @@ -2468,12 +2471,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); ext4_ext_init(sb); - err = ext4_mb_init(sb, needs_recovery); - if (err) { - printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", - err); - goto failed_mount4; - } + ext4_mb_init(sb, needs_recovery); lock_kernel(); return 0; @@ -2491,16 +2489,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); - percpu_counter_destroy(&sbi->s_dirtyblocks_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); failed_mount: - if (sbi->s_proc) { - remove_proc_entry("inode_readahead_blks", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); @@ -2559,7 +2552,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, return NULL; } - jbd_debug(2, "Journal inode found at %p: %lld bytes\n", + jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", journal_inode, journal_inode->i_size); if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); @@ -2722,11 +2715,6 @@ static int ext4_load_journal(struct super_block *sb, return -EINVAL; } - if (journal->j_flags & JBD2_BARRIER) - printk(KERN_INFO "EXT4-fs: barriers enabled\n"); - else - printk(KERN_INFO "EXT4-fs: barriers disabled\n"); - if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = jbd2_journal_update_format(journal); if (err) { @@ -2811,34 +2799,13 @@ static void ext4_commit_super(struct super_block *sb, if (!sbh) return; - if (buffer_write_io_error(sbh)) { - /* - * Oh, dear. A previous attempt to write the - * superblock failed. This could happen because the - * USB device was yanked out. Or it could happen to - * be a transient write error and maybe the block will - * be remapped. Nothing we can do but to retry the - * write and hope for the best. - */ - printk(KERN_ERR "ext4: previous I/O error to " - "superblock detected for %s.\n", sb->s_id); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } es->s_wtime = cpu_to_le32(get_seconds()); ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); - if (sync) { + if (sync) sync_dirty_buffer(sbh); - if (buffer_write_io_error(sbh)) { - printk(KERN_ERR "ext4: I/O error while writing " - "superblock for %s.\n", sb->s_id); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } - } } @@ -2940,7 +2907,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) { tid_t target; - trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); sb->s_dirt = 0; if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { if (wait) @@ -3196,8 +3162,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; - buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - - percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); + buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); ext4_free_blocks_count_set(es, buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) @@ -3467,7 +3432,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, handle_t *handle = journal_current_handle(); if (!handle) { - printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" + printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" " cancelled because transaction is not started.\n", (unsigned long long)off, (unsigned long long)len); return -EIO; @@ -3528,82 +3493,18 @@ static int ext4_get_sb(struct file_system_type *fs_type, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); } -#ifdef CONFIG_PROC_FS -static int ext4_ui_proc_show(struct seq_file *m, void *v) -{ - unsigned int *p = m->private; - - seq_printf(m, "%u\n", *p); - return 0; -} - -static int ext4_ui_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ext4_ui_proc_show, PDE(inode)->data); -} - -static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, - size_t cnt, loff_t *ppos) -{ - unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; - char str[32]; - unsigned long value; - - if (cnt >= sizeof(str)) - return -EINVAL; - if (copy_from_user(str, buf, cnt)) - return -EFAULT; - value = simple_strtol(str, NULL, 0); - if (value < 0) - return -ERANGE; - *p = value; - return cnt; -} - -const struct file_operations ext4_ui_proc_fops = { - .owner = THIS_MODULE, - .open = ext4_ui_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = ext4_ui_proc_write, -}; -#endif - -static struct file_system_type ext4_fs_type = { - .owner = THIS_MODULE, - .name = "ext4", - .get_sb = ext4_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -#ifdef CONFIG_EXT4DEV_COMPAT -static int ext4dev_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) -{ - printk(KERN_WARNING "EXT4-fs: Update your userspace programs " - "to mount using ext4\n"); - printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " - "will go away by 2.6.31\n"); - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); -} - static struct file_system_type ext4dev_fs_type = { .owner = THIS_MODULE, .name = "ext4dev", - .get_sb = ext4dev_get_sb, + .get_sb = ext4_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; -MODULE_ALIAS("ext4dev"); -#endif static int __init init_ext4_fs(void) { int err; - ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = init_ext4_mballoc(); if (err) return err; @@ -3614,16 +3515,9 @@ static int __init init_ext4_fs(void) err = init_inodecache(); if (err) goto out1; - err = register_filesystem(&ext4_fs_type); - if (err) - goto out; -#ifdef CONFIG_EXT4DEV_COMPAT err = register_filesystem(&ext4dev_fs_type); - if (err) { - unregister_filesystem(&ext4_fs_type); + if (err) goto out; - } -#endif return 0; out: destroy_inodecache(); @@ -3636,14 +3530,10 @@ static int __init init_ext4_fs(void) static void __exit exit_ext4_fs(void) { - unregister_filesystem(&ext4_fs_type); -#ifdef CONFIG_EXT4DEV_COMPAT unregister_filesystem(&ext4dev_fs_type); -#endif destroy_inodecache(); exit_ext4_xattr(); exit_ext4_mballoc(); - remove_proc_entry("fs/ext4", NULL); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/trunk/fs/ext4/symlink.c b/trunk/fs/ext4/symlink.c index 00740cb32be3..e9178643dc01 100644 --- a/trunk/fs/ext4/symlink.c +++ b/trunk/fs/ext4/symlink.c @@ -23,10 +23,10 @@ #include "ext4.h" #include "xattr.h" -static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) +static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); - nd_set_link(nd, (char *) ei->i_data); + nd_set_link(nd, (char*)ei->i_data); return NULL; } @@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, @@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { const struct inode_operations ext4_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ext4_follow_link, -#ifdef CONFIG_EXT4_FS_XATTR +#ifdef CONFIG_EXT4DEV_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, diff --git a/trunk/fs/ext4/xattr.c b/trunk/fs/ext4/xattr.c index 80626d516fee..8954208b4893 100644 --- a/trunk/fs/ext4/xattr.c +++ b/trunk/fs/ext4/xattr.c @@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; static struct xattr_handler *ext4_xattr_handler_map[] = { [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, #endif [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, -#ifdef CONFIG_EXT4_FS_SECURITY +#ifdef CONFIG_EXT4DEV_FS_SECURITY [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, #endif }; @@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { struct xattr_handler *ext4_xattr_handlers[] = { &ext4_xattr_user_handler, &ext4_xattr_trusted_handler, -#ifdef CONFIG_EXT4_FS_POSIX_ACL +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL &ext4_xattr_acl_access_handler, &ext4_xattr_acl_default_handler, #endif -#ifdef CONFIG_EXT4_FS_SECURITY +#ifdef CONFIG_EXT4DEV_FS_SECURITY &ext4_xattr_security_handler, #endif NULL @@ -959,7 +959,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; - unsigned long no_expand; int error; if (!name) @@ -967,9 +966,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (strlen(name) > 255) return -ERANGE; down_write(&EXT4_I(inode)->xattr_sem); - no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; - error = ext4_get_inode_loc(inode, &is.iloc); if (error) goto cleanup; @@ -1046,8 +1042,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); - if (no_expand == 0) - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; up_write(&EXT4_I(inode)->xattr_sem); return error; } diff --git a/trunk/fs/ext4/xattr.h b/trunk/fs/ext4/xattr.h index 8ede88b18c29..5992fe979bb9 100644 --- a/trunk/fs/ext4/xattr.h +++ b/trunk/fs/ext4/xattr.h @@ -51,8 +51,8 @@ struct ext4_xattr_entry { (((name_len) + EXT4_XATTR_ROUND + \ sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) #define EXT4_XATTR_NEXT(entry) \ - ((struct ext4_xattr_entry *)( \ - (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) + ( (struct ext4_xattr_entry *)( \ + (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) @@ -63,7 +63,7 @@ struct ext4_xattr_entry { EXT4_I(inode)->i_extra_isize)) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) -# ifdef CONFIG_EXT4_FS_XATTR +# ifdef CONFIG_EXT4DEV_FS_XATTR extern struct xattr_handler ext4_xattr_user_handler; extern struct xattr_handler ext4_xattr_trusted_handler; @@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); extern struct xattr_handler *ext4_xattr_handlers[]; -# else /* CONFIG_EXT4_FS_XATTR */ +# else /* CONFIG_EXT4DEV_FS_XATTR */ static inline int ext4_xattr_get(struct inode *inode, int name_index, const char *name, @@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, #define ext4_xattr_handlers NULL -# endif /* CONFIG_EXT4_FS_XATTR */ +# endif /* CONFIG_EXT4DEV_FS_XATTR */ -#ifdef CONFIG_EXT4_FS_SECURITY +#ifdef CONFIG_EXT4DEV_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir); #else diff --git a/trunk/fs/ioctl.c b/trunk/fs/ioctl.c index 33a6b7ecb8b8..7db32b3382d3 100644 --- a/trunk/fs/ioctl.c +++ b/trunk/fs/ioctl.c @@ -13,14 +13,9 @@ #include #include #include -#include -#include #include -/* So that the fiemap access checks can't overflow on 32 bit machines. */ -#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) - /** * vfs_ioctl - call filesystem specific ioctl methods * @filp: open file to invoke ioctl method on @@ -76,272 +71,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p) return put_user(res, p); } -/** - * fiemap_fill_next_extent - Fiemap helper function - * @fieinfo: Fiemap context passed into ->fiemap - * @logical: Extent logical start offset, in bytes - * @phys: Extent physical start offset, in bytes - * @len: Extent length, in bytes - * @flags: FIEMAP_EXTENT flags that describe this extent - * - * Called from file system ->fiemap callback. Will populate extent - * info as passed in via arguments and copy to user memory. On - * success, extent count on fieinfo is incremented. - * - * Returns 0 on success, -errno on error, 1 if this was the last - * extent that will fit in user array. - */ -#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) -#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) -#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) -int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, - u64 phys, u64 len, u32 flags) -{ - struct fiemap_extent extent; - struct fiemap_extent *dest = fieinfo->fi_extents_start; - - /* only count the extents */ - if (fieinfo->fi_extents_max == 0) { - fieinfo->fi_extents_mapped++; - return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; - } - - if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) - return 1; - - if (flags & SET_UNKNOWN_FLAGS) - flags |= FIEMAP_EXTENT_UNKNOWN; - if (flags & SET_NO_UNMOUNTED_IO_FLAGS) - flags |= FIEMAP_EXTENT_ENCODED; - if (flags & SET_NOT_ALIGNED_FLAGS) - flags |= FIEMAP_EXTENT_NOT_ALIGNED; - - memset(&extent, 0, sizeof(extent)); - extent.fe_logical = logical; - extent.fe_physical = phys; - extent.fe_length = len; - extent.fe_flags = flags; - - dest += fieinfo->fi_extents_mapped; - if (copy_to_user(dest, &extent, sizeof(extent))) - return -EFAULT; - - fieinfo->fi_extents_mapped++; - if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) - return 1; - return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; -} -EXPORT_SYMBOL(fiemap_fill_next_extent); - -/** - * fiemap_check_flags - check validity of requested flags for fiemap - * @fieinfo: Fiemap context passed into ->fiemap - * @fs_flags: Set of fiemap flags that the file system understands - * - * Called from file system ->fiemap callback. This will compute the - * intersection of valid fiemap flags and those that the fs supports. That - * value is then compared against the user supplied flags. In case of bad user - * flags, the invalid values will be written into the fieinfo structure, and - * -EBADR is returned, which tells ioctl_fiemap() to return those values to - * userspace. For this reason, a return code of -EBADR should be preserved. - * - * Returns 0 on success, -EBADR on bad flags. - */ -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) -{ - u32 incompat_flags; - - incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); - if (incompat_flags) { - fieinfo->fi_flags = incompat_flags; - return -EBADR; - } - return 0; -} -EXPORT_SYMBOL(fiemap_check_flags); - -static int fiemap_check_ranges(struct super_block *sb, - u64 start, u64 len, u64 *new_len) -{ - *new_len = len; - - if (len == 0) - return -EINVAL; - - if (start > sb->s_maxbytes) - return -EFBIG; - - /* - * Shrink request scope to what the fs can actually handle. - */ - if ((len > sb->s_maxbytes) || - (sb->s_maxbytes - len) < start) - *new_len = sb->s_maxbytes - start; - - return 0; -} - -static int ioctl_fiemap(struct file *filp, unsigned long arg) -{ - struct fiemap fiemap; - struct fiemap_extent_info fieinfo = { 0, }; - struct inode *inode = filp->f_path.dentry->d_inode; - struct super_block *sb = inode->i_sb; - u64 len; - int error; - - if (!inode->i_op->fiemap) - return -EOPNOTSUPP; - - if (copy_from_user(&fiemap, (struct fiemap __user *)arg, - sizeof(struct fiemap))) - return -EFAULT; - - if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) - return -EINVAL; - - error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, - &len); - if (error) - return error; - - fieinfo.fi_flags = fiemap.fm_flags; - fieinfo.fi_extents_max = fiemap.fm_extent_count; - fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); - - if (fiemap.fm_extent_count != 0 && - !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, - fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) - return -EFAULT; - - if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) - filemap_write_and_wait(inode->i_mapping); - - error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); - fiemap.fm_flags = fieinfo.fi_flags; - fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; - if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) - error = -EFAULT; - - return error; -} - -#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) -#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); - -/* - * @inode - the inode to map - * @arg - the pointer to userspace where we copy everything to - * @get_block - the fs's get_block function - * - * This does FIEMAP for block based inodes. Basically it will just loop - * through get_block until we hit the number of extents we want to map, or we - * go past the end of the file and hit a hole. - * - * If it is possible to have data blocks beyond a hole past @inode->i_size, then - * please do not use this function, it will stop at the first unmapped block - * beyond i_size - */ -int generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block) -{ - struct buffer_head tmp; - unsigned int start_blk; - long long length = 0, map_len = 0; - u64 logical = 0, phys = 0, size = 0; - u32 flags = FIEMAP_EXTENT_MERGED; - int ret = 0; - - if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) - return ret; - - start_blk = logical_to_blk(inode, start); - - /* guard against change */ - mutex_lock(&inode->i_mutex); - - length = (long long)min_t(u64, len, i_size_read(inode)); - map_len = length; - - do { - /* - * we set b_size to the total size we want so it will map as - * many contiguous blocks as possible at once - */ - memset(&tmp, 0, sizeof(struct buffer_head)); - tmp.b_size = map_len; - - ret = get_block(inode, start_blk, &tmp, 0); - if (ret) - break; - - /* HOLE */ - if (!buffer_mapped(&tmp)) { - /* - * first hole after going past the EOF, this is our - * last extent - */ - if (length <= 0) { - flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, - flags); - break; - } - - length -= blk_to_logical(inode, 1); - - /* if we have holes up to/past EOF then we're done */ - if (length <= 0) - break; - - start_blk++; - } else { - if (length <= 0 && size) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, - flags); - if (ret) - break; - } - - logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, tmp.b_blocknr); - size = tmp.b_size; - flags = FIEMAP_EXTENT_MERGED; - - length -= tmp.b_size; - start_blk += logical_to_blk(inode, size); - - /* - * if we are past the EOF we need to loop again to see - * if there is a hole so we can mark this extent as the - * last one, and if not keep mapping things until we - * find a hole, or we run out of slots in the extent - * array - */ - if (length <= 0) - continue; - - ret = fiemap_fill_next_extent(fieinfo, logical, phys, - size, flags); - if (ret) - break; - } - cond_resched(); - } while (1); - - mutex_unlock(&inode->i_mutex); - - /* if ret is 1 then we just hit the end of the extent array */ - if (ret == 1) - ret = 0; - - return ret; -} -EXPORT_SYMBOL(generic_block_fiemap); - static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -351,8 +80,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd, switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); - case FS_IOC_FIEMAP: - return ioctl_fiemap(filp, arg); case FIGETBSZ: return put_user(inode->i_sb->s_blocksize, p); case FIONREAD: diff --git a/trunk/fs/jbd2/checkpoint.c b/trunk/fs/jbd2/checkpoint.c index 42895d369458..91389c8aee8a 100644 --- a/trunk/fs/jbd2/checkpoint.c +++ b/trunk/fs/jbd2/checkpoint.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -127,29 +126,14 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we - * were waiting for the checkpoint lock. If there are no - * outstanding transactions there is nothing to checkpoint and - * we can't make progress. Abort the journal in this case. + * were waiting for the checkpoint lock */ spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); if (__jbd2_log_space_left(journal) < nblocks) { - int chkpt = journal->j_checkpoint_transactions != NULL; - - spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); - if (chkpt) { - jbd2_log_do_checkpoint(journal); - } else { - printk(KERN_ERR "%s: no transactions\n", - __func__); - jbd2_journal_abort(journal, 0); - } - + jbd2_log_do_checkpoint(journal); spin_lock(&journal->j_state_lock); - } else { - spin_unlock(&journal->j_list_lock); } mutex_unlock(&journal->j_checkpoint_mutex); } @@ -329,8 +313,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); - trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", - journal->j_devname, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; diff --git a/trunk/fs/jbd2/commit.c b/trunk/fs/jbd2/commit.c index 0d3814a35ed1..f2ad061e95ec 100644 --- a/trunk/fs/jbd2/commit.c +++ b/trunk/fs/jbd2/commit.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -127,7 +126,8 @@ static int journal_submit_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "submit commit block"); lock_buffer(bh); - clear_buffer_dirty(bh); + get_bh(bh); + set_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; @@ -147,9 +147,12 @@ static int journal_submit_commit_record(journal_t *journal, * to remember if we sent a barrier request */ if (ret == -EOPNOTSUPP && barrier_done) { + char b[BDEVNAME_SIZE]; + printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", journal->j_devname); + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); spin_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_BARRIER; spin_unlock(&journal->j_state_lock); @@ -157,7 +160,7 @@ static int journal_submit_commit_record(journal_t *journal, /* And try again, without the barrier */ lock_buffer(bh); set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + set_buffer_dirty(bh); ret = submit_bh(WRITE, bh); } *cbh = bh; @@ -368,8 +371,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); - trace_mark(jbd2_start_commit, "dev %s transaction %d", - journal->j_devname, commit_transaction->t_tid); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); @@ -680,9 +681,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { + char b[BDEVNAME_SIZE]; + printk(KERN_WARNING "JBD2: Detected IO errors while flushing file data " - "on %s\n", journal->j_devname); + "on %s\n", bdevname(journal->j_fs_dev, b)); err = 0; } @@ -987,9 +990,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) } spin_unlock(&journal->j_list_lock); - trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, commit_transaction->t_tid, - journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c index 01c3901c3a07..8207a01c4edb 100644 --- a/trunk/fs/jbd2/journal.c +++ b/trunk/fs/jbd2/journal.c @@ -597,9 +597,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, if (ret) *retp = ret; else { + char b[BDEVNAME_SIZE]; + printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", - __func__, blocknr, journal->j_devname); + __func__, + blocknr, + bdevname(journal->j_dev, b)); err = -EIO; __journal_abort_soft(journal, err); } @@ -897,7 +901,10 @@ static struct proc_dir_entry *proc_jbd2_stats; static void jbd2_stats_proc_init(journal_t *journal) { - journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); + char name[BDEVNAME_SIZE]; + + bdevname(journal->j_dev, name); + journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); if (journal->j_proc_entry) { proc_create_data("history", S_IRUGO, journal->j_proc_entry, &jbd2_seq_history_fops, journal); @@ -908,9 +915,12 @@ static void jbd2_stats_proc_init(journal_t *journal) static void jbd2_stats_proc_exit(journal_t *journal) { + char name[BDEVNAME_SIZE]; + + bdevname(journal->j_dev, name); remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry("history", journal->j_proc_entry); - remove_proc_entry(journal->j_devname, proc_jbd2_stats); + remove_proc_entry(name, proc_jbd2_stats); } static void journal_init_stats(journal_t *journal) @@ -1008,7 +1018,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, { journal_t *journal = journal_init_common(); struct buffer_head *bh; - char *p; int n; if (!journal) @@ -1030,10 +1039,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; journal->j_maxlen = len; - bdevname(journal->j_dev, journal->j_devname); - p = journal->j_devname; - while ((p = strchr(p, '/'))) - *p = '!'; jbd2_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); @@ -1056,7 +1061,6 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) { struct buffer_head *bh; journal_t *journal = journal_init_common(); - char *p; int err; int n; unsigned long long blocknr; @@ -1066,12 +1070,6 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; journal->j_inode = inode; - bdevname(journal->j_dev, journal->j_devname); - p = journal->j_devname; - while ((p = strchr(p, '/'))) - *p = '!'; - p = journal->j_devname + strlen(journal->j_devname); - sprintf(p, ":%lu", journal->j_inode->i_ino); jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", journal, inode->i_sb->s_id, inode->i_ino, @@ -1255,22 +1253,6 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) goto out; } - if (buffer_write_io_error(bh)) { - /* - * Oh, dear. A previous attempt to write the journal - * superblock failed. This could happen because the - * USB device was yanked out. Or it could happen to - * be a transient write error and maybe the block will - * be remapped. Nothing we can do but to retry the - * write and hope for the best. - */ - printk(KERN_ERR "JBD2: previous I/O error detected " - "for journal superblock update for %s.\n", - journal->j_devname); - clear_buffer_write_io_error(bh); - set_buffer_uptodate(bh); - } - spin_lock(&journal->j_state_lock); jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); @@ -1282,16 +1264,9 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); - if (wait) { + if (wait) sync_dirty_buffer(bh); - if (buffer_write_io_error(bh)) { - printk(KERN_ERR "JBD2: I/O error detected " - "when updating journal superblock for %s.\n", - journal->j_devname); - clear_buffer_write_io_error(bh); - set_buffer_uptodate(bh); - } - } else + else ll_rw_block(SWRITE, 1, &bh); out: @@ -1785,6 +1760,23 @@ int jbd2_journal_wipe(journal_t *journal, int write) return err; } +/* + * journal_dev_name: format a character string to describe on what + * device this journal is present. + */ + +static const char *journal_dev_name(journal_t *journal, char *buffer) +{ + struct block_device *bdev; + + if (journal->j_inode) + bdev = journal->j_inode->i_sb->s_bdev; + else + bdev = journal->j_dev; + + return bdevname(bdev, buffer); +} + /* * Journal abort has very specific semantics, which we describe * for journal abort. @@ -1801,12 +1793,13 @@ int jbd2_journal_wipe(journal_t *journal, int write) void __jbd2_journal_abort_hard(journal_t *journal) { transaction_t *transaction; + char b[BDEVNAME_SIZE]; if (journal->j_flags & JBD2_ABORT) return; printk(KERN_ERR "Aborting journal on device %s.\n", - journal->j_devname); + journal_dev_name(journal, b)); spin_lock(&journal->j_state_lock); journal->j_flags |= JBD2_ABORT; diff --git a/trunk/fs/ocfs2/alloc.c b/trunk/fs/ocfs2/alloc.c index 29ff57ec5d1f..10bfb466e068 100644 --- a/trunk/fs/ocfs2/alloc.c +++ b/trunk/fs/ocfs2/alloc.c @@ -989,6 +989,15 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, return ret; } +/* + * This is only valid for leaf nodes, which are the only ones that can + * have empty extents anyway. + */ +static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) +{ + return !rec->e_leaf_clusters; +} + /* * This function will discard the rightmost extent record. */ diff --git a/trunk/fs/ocfs2/alloc.h b/trunk/fs/ocfs2/alloc.h index 60cd3d59230c..42ff94bd8011 100644 --- a/trunk/fs/ocfs2/alloc.h +++ b/trunk/fs/ocfs2/alloc.h @@ -146,13 +146,4 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, return le16_to_cpu(rec->e_leaf_clusters); } -/* - * This is only valid for leaf nodes, which are the only ones that can - * have empty extents anyway. - */ -static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) -{ - return !rec->e_leaf_clusters; -} - #endif /* OCFS2_ALLOC_H */ diff --git a/trunk/fs/ocfs2/extent_map.c b/trunk/fs/ocfs2/extent_map.c index aed268e80b49..c58668a326fe 100644 --- a/trunk/fs/ocfs2/extent_map.c +++ b/trunk/fs/ocfs2/extent_map.c @@ -25,7 +25,6 @@ #include #include #include -#include #define MLOG_MASK_PREFIX ML_EXTENT_MAP #include @@ -33,7 +32,6 @@ #include "ocfs2.h" #include "alloc.h" -#include "dlmglue.h" #include "extent_map.h" #include "inode.h" #include "super.h" @@ -284,51 +282,6 @@ void ocfs2_extent_map_insert_rec(struct inode *inode, kfree(new_emi); } -static int ocfs2_last_eb_is_empty(struct inode *inode, - struct ocfs2_dinode *di) -{ - int ret, next_free; - u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); - struct buffer_head *eb_bh = NULL; - struct ocfs2_extent_block *eb; - struct ocfs2_extent_list *el; - - ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk, - &eb_bh, OCFS2_BH_CACHED, inode); - if (ret) { - mlog_errno(ret); - goto out; - } - - eb = (struct ocfs2_extent_block *) eb_bh->b_data; - el = &eb->h_list; - - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - goto out; - } - - if (el->l_tree_depth) { - ocfs2_error(inode->i_sb, - "Inode %lu has non zero tree depth in " - "leaf block %llu\n", inode->i_ino, - (unsigned long long)eb_bh->b_blocknr); - ret = -EROFS; - goto out; - } - - next_free = le16_to_cpu(el->l_next_free_rec); - - if (next_free == 0 || - (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) - ret = 1; - -out: - brelse(eb_bh); - return ret; -} - /* * Return the 1st index within el which contains an extent start * larger than v_cluster. @@ -420,28 +373,42 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, return ret; } -static int ocfs2_get_clusters_nocache(struct inode *inode, - struct buffer_head *di_bh, - u32 v_cluster, unsigned int *hole_len, - struct ocfs2_extent_rec *ret_rec, - unsigned int *is_last) +int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, + u32 *p_cluster, u32 *num_clusters, + unsigned int *extent_flags) { - int i, ret, tree_height, len; + int ret, i; + unsigned int flags = 0; + struct buffer_head *di_bh = NULL; + struct buffer_head *eb_bh = NULL; struct ocfs2_dinode *di; - struct ocfs2_extent_block *uninitialized_var(eb); + struct ocfs2_extent_block *eb; struct ocfs2_extent_list *el; struct ocfs2_extent_rec *rec; - struct buffer_head *eb_bh = NULL; + u32 coff; + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = -ERANGE; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, + num_clusters, extent_flags); + if (ret == 0) + goto out; - memset(ret_rec, 0, sizeof(*ret_rec)); - if (is_last) - *is_last = 0; + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, + &di_bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } di = (struct ocfs2_dinode *) di_bh->b_data; el = &di->id2.i_list; - tree_height = le16_to_cpu(el->l_tree_depth); - if (tree_height > 0) { + if (el->l_tree_depth) { ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); if (ret) { mlog_errno(ret); @@ -464,143 +431,46 @@ static int ocfs2_get_clusters_nocache(struct inode *inode, i = ocfs2_search_extent_list(el, v_cluster); if (i == -1) { /* - * Holes can be larger than the maximum size of an - * extent, so we return their lengths in a seperate - * field. + * A hole was found. Return some canned values that + * callers can key on. If asked for, num_clusters will + * be populated with the size of the hole. */ - if (hole_len) { + *p_cluster = 0; + if (num_clusters) { ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, - v_cluster, &len); + v_cluster, + num_clusters); if (ret) { mlog_errno(ret); goto out; } - - *hole_len = len; } - goto out_hole; - } - - rec = &el->l_recs[i]; - - BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); - - if (!rec->e_blkno) { - ocfs2_error(inode->i_sb, "Inode %lu has bad extent " - "record (%u, %u, 0)", inode->i_ino, - le32_to_cpu(rec->e_cpos), - ocfs2_rec_clusters(el, rec)); - ret = -EROFS; - goto out; - } + } else { + rec = &el->l_recs[i]; - *ret_rec = *rec; + BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); - /* - * Checking for last extent is potentially expensive - we - * might have to look at the next leaf over to see if it's - * empty. - * - * The first two checks are to see whether the caller even - * cares for this information, and if the extent is at least - * the last in it's list. - * - * If those hold true, then the extent is last if any of the - * additional conditions hold true: - * - Extent list is in-inode - * - Extent list is right-most - * - Extent list is 2nd to rightmost, with empty right-most - */ - if (is_last) { - if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { - if (tree_height == 0) - *is_last = 1; - else if (eb->h_blkno == di->i_last_eb_blk) - *is_last = 1; - else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { - ret = ocfs2_last_eb_is_empty(inode, di); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - if (ret == 1) - *is_last = 1; - } + if (!rec->e_blkno) { + ocfs2_error(inode->i_sb, "Inode %lu has bad extent " + "record (%u, %u, 0)", inode->i_ino, + le32_to_cpu(rec->e_cpos), + ocfs2_rec_clusters(el, rec)); + ret = -EROFS; + goto out; } - } - -out_hole: - ret = 0; -out: - brelse(eb_bh); - return ret; -} - -static void ocfs2_relative_extent_offsets(struct super_block *sb, - u32 v_cluster, - struct ocfs2_extent_rec *rec, - u32 *p_cluster, u32 *num_clusters) - -{ - u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); - - *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); - *p_cluster = *p_cluster + coff; - - if (num_clusters) - *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; -} - -int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, - u32 *p_cluster, u32 *num_clusters, - unsigned int *extent_flags) -{ - int ret; - unsigned int uninitialized_var(hole_len), flags = 0; - struct buffer_head *di_bh = NULL; - struct ocfs2_extent_rec rec; - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - ret = -ERANGE; - mlog_errno(ret); - goto out; - } + coff = v_cluster - le32_to_cpu(rec->e_cpos); - ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, - num_clusters, extent_flags); - if (ret == 0) - goto out; + *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(rec->e_blkno)); + *p_cluster = *p_cluster + coff; - ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, - &di_bh, OCFS2_BH_CACHED, inode); - if (ret) { - mlog_errno(ret); - goto out; - } + if (num_clusters) + *num_clusters = ocfs2_rec_clusters(el, rec) - coff; - ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, - &rec, NULL); - if (ret) { - mlog_errno(ret); - goto out; - } + flags = rec->e_flags; - if (rec.e_blkno == 0ULL) { - /* - * A hole was found. Return some canned values that - * callers can key on. If asked for, num_clusters will - * be populated with the size of the hole. - */ - *p_cluster = 0; - if (num_clusters) { - *num_clusters = hole_len; - } - } else { - ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, - p_cluster, num_clusters); - flags = rec.e_flags; - - ocfs2_extent_map_insert_rec(inode, &rec); + ocfs2_extent_map_insert_rec(inode, rec); } if (extent_flags) @@ -608,6 +478,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, out: brelse(di_bh); + brelse(eb_bh); return ret; } @@ -650,114 +521,3 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, out: return ret; } - -static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, - struct fiemap_extent_info *fieinfo, - u64 map_start) -{ - int ret; - unsigned int id_count; - struct ocfs2_dinode *di; - u64 phys; - u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; - struct ocfs2_inode_info *oi = OCFS2_I(inode); - - di = (struct ocfs2_dinode *)di_bh->b_data; - id_count = le16_to_cpu(di->id2.i_data.id_count); - - if (map_start < id_count) { - phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; - phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); - - ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, - flags); - if (ret < 0) - return ret; - } - - return 0; -} - -#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) - -int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 map_start, u64 map_len) -{ - int ret, is_last; - u32 mapping_end, cpos; - unsigned int hole_size; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - u64 len_bytes, phys_bytes, virt_bytes; - struct buffer_head *di_bh = NULL; - struct ocfs2_extent_rec rec; - - ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); - if (ret) - return ret; - - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret) { - mlog_errno(ret); - goto out; - } - - down_read(&OCFS2_I(inode)->ip_alloc_sem); - - /* - * Handle inline-data separately. - */ - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); - goto out_unlock; - } - - cpos = map_start >> osb->s_clustersize_bits; - mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, - map_start + map_len); - mapping_end -= cpos; - is_last = 0; - while (cpos < mapping_end && !is_last) { - u32 fe_flags; - - ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, - &hole_size, &rec, &is_last); - if (ret) { - mlog_errno(ret); - goto out; - } - - if (rec.e_blkno == 0ULL) { - cpos += hole_size; - continue; - } - - fe_flags = 0; - if (rec.e_flags & OCFS2_EXT_UNWRITTEN) - fe_flags |= FIEMAP_EXTENT_UNWRITTEN; - if (is_last) - fe_flags |= FIEMAP_EXTENT_LAST; - len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; - phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; - virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; - - ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, - len_bytes, fe_flags); - if (ret) - break; - - cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); - } - - if (ret > 0) - ret = 0; - -out_unlock: - brelse(di_bh); - - up_read(&OCFS2_I(inode)->ip_alloc_sem); - - ocfs2_inode_unlock(inode, 0); -out: - - return ret; -} diff --git a/trunk/fs/ocfs2/extent_map.h b/trunk/fs/ocfs2/extent_map.h index 1b97490e1ea8..de91e3e41a22 100644 --- a/trunk/fs/ocfs2/extent_map.h +++ b/trunk/fs/ocfs2/extent_map.h @@ -50,7 +50,4 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, u64 *ret_count, unsigned int *extent_flags); -int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 map_start, u64 map_len); - #endif /* _EXTENT_MAP_H */ diff --git a/trunk/fs/ocfs2/file.c b/trunk/fs/ocfs2/file.c index ed38796052d2..ec2ed15c3daa 100644 --- a/trunk/fs/ocfs2/file.c +++ b/trunk/fs/ocfs2/file.c @@ -2228,7 +2228,6 @@ const struct inode_operations ocfs2_file_iops = { .getattr = ocfs2_getattr, .permission = ocfs2_permission, .fallocate = ocfs2_fallocate, - .fiemap = ocfs2_fiemap, }; const struct inode_operations ocfs2_special_file_iops = { diff --git a/trunk/include/asm-generic/vmlinux.lds.h b/trunk/include/asm-generic/vmlinux.lds.h index 7440a0dceddb..cb752ba72466 100644 --- a/trunk/include/asm-generic/vmlinux.lds.h +++ b/trunk/include/asm-generic/vmlinux.lds.h @@ -385,7 +385,6 @@ . = ALIGN(align); \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ - *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ } \ diff --git a/trunk/include/asm-x86/apic.h b/trunk/include/asm-x86/apic.h index d76a0839abe9..65590c9aecd4 100644 --- a/trunk/include/asm-x86/apic.h +++ b/trunk/include/asm-x86/apic.h @@ -9,8 +9,6 @@ #include #include #include -#include -#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -49,6 +47,8 @@ extern int disable_apic; #ifdef CONFIG_PARAVIRT #include #else +#define apic_write native_apic_write +#define apic_read native_apic_read #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock #endif @@ -60,7 +60,7 @@ extern u64 xapic_icr_read(void); extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); -static inline void native_apic_mem_write(u32 reg, u32 v) +static inline void native_apic_write(unsigned long reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); @@ -69,68 +69,15 @@ static inline void native_apic_mem_write(u32 reg, u32 v) ASM_OUTPUT2("0" (v), "m" (*addr))); } -static inline u32 native_apic_mem_read(u32 reg) +static inline u32 native_apic_read(unsigned long reg) { return *((volatile u32 *)(APIC_BASE + reg)); } -static inline void native_apic_msr_write(u32 reg, u32 v) -{ - if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || - reg == APIC_LVR) - return; - - wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); -} - -static inline u32 native_apic_msr_read(u32 reg) -{ - u32 low, high; - - if (reg == APIC_DFR) - return -1; - - rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); - return low; -} - -#ifndef CONFIG_X86_32 -extern int x2apic, x2apic_preenabled; -extern void check_x2apic(void); -extern void enable_x2apic(void); -extern void enable_IR_x2apic(void); -extern void x2apic_icr_write(u32 low, u32 id); -#endif - -struct apic_ops { - u32 (*read)(u32 reg); - void (*write)(u32 reg, u32 v); - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); -}; - -extern struct apic_ops *apic_ops; - -#define apic_read (apic_ops->read) -#define apic_write (apic_ops->write) -#define apic_icr_read (apic_ops->icr_read) -#define apic_icr_write (apic_ops->icr_write) -#define apic_wait_icr_idle (apic_ops->wait_icr_idle) -#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) - +extern void apic_wait_icr_idle(void); +extern u32 safe_apic_wait_icr_idle(void); extern int get_physical_broadcast(void); -#ifdef CONFIG_X86_64 -static inline void ack_x2APIC_irq(void) -{ - /* Docs say use 0 for future compatibility */ - native_apic_msr_write(APIC_EOI, 0); -} -#endif - - static inline void ack_APIC_irq(void) { /* diff --git a/trunk/include/asm-x86/apicdef.h b/trunk/include/asm-x86/apicdef.h index b922c85ac91d..c40687da20fc 100644 --- a/trunk/include/asm-x86/apicdef.h +++ b/trunk/include/asm-x86/apicdef.h @@ -105,7 +105,6 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 -#define APIC_SELF_IPI 0x3F0 #define APIC_TDR_DIV_TMBASE (1 << 2) #define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_2 0x0 @@ -129,8 +128,6 @@ #define APIC_EILVT3 0x530 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#define APIC_BASE_MSR 0x800 -#define X2APIC_ENABLE (1UL << 10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 diff --git a/trunk/include/asm-x86/arch_hooks.h b/trunk/include/asm-x86/arch_hooks.h index de4596b24c23..72adc3a109cc 100644 --- a/trunk/include/asm-x86/arch_hooks.h +++ b/trunk/include/asm-x86/arch_hooks.h @@ -12,6 +12,8 @@ /* these aren't arch hooks, they are generic routines * that can be used by the hooks */ extern void init_ISA_irqs(void); +extern void apic_intr_init(void); +extern void smp_intr_init(void); extern irqreturn_t timer_interrupt(int irq, void *dev_id); /* these are the defined hooks */ diff --git a/trunk/include/asm-x86/bigsmp/apicdef.h b/trunk/include/asm-x86/bigsmp/apicdef.h deleted file mode 100644 index 392c3f5ef2fe..000000000000 --- a/trunk/include/asm-x86/bigsmp/apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/trunk/include/asm-x86/bugs.h b/trunk/include/asm-x86/bugs.h index dc604985f2ad..4761c461d23a 100644 --- a/trunk/include/asm-x86/bugs.h +++ b/trunk/include/asm-x86/bugs.h @@ -2,11 +2,6 @@ #define ASM_X86__BUGS_H extern void check_bugs(void); - -#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); -#else -static inline int ppro_with_ram_bug(void) { return 0; } -#endif #endif /* ASM_X86__BUGS_H */ diff --git a/trunk/include/asm-x86/cpufeature.h b/trunk/include/asm-x86/cpufeature.h index adfeae6586e1..250fa0cb144b 100644 --- a/trunk/include/asm-x86/cpufeature.h +++ b/trunk/include/asm-x86/cpufeature.h @@ -6,13 +6,7 @@ #include -#define NCAPINTS 9 /* N 32-bit words worth of info */ - -/* - * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. - */ +#define NCAPINTS 8 /* N 32-bit words worth of info */ /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ #define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ @@ -20,7 +14,7 @@ #define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ #define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ #define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ #define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ #define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ #define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ @@ -29,23 +23,22 @@ #define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ #define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ #define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */ - /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ #define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */ -#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DS (0*32+21) /* Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM (0*32+25) /* "sse" */ -#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ #define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ #define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ @@ -53,8 +46,7 @@ #define X86_FEATURE_MP (1*32+19) /* MP Capable. */ #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */ -#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */ #define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ @@ -72,79 +64,53 @@ #define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ /* cpu types for specific tunings: */ -#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */ +#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ +#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ -#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ +#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ +#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ -#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ -#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ +#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */ +#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */ #define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ #define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_CID (4*32+10) /* Context ID */ -#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ -#define X86_FEATURE_AES (4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ +#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */ +#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */ +#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ #define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ #define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */ +#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */ +#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ -#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ +#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -152,13 +118,6 @@ */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ -/* Virtualization flags: Linux defined */ -#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ - #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include @@ -192,7 +151,7 @@ extern const char * const x86_power_flags[32]; } while (0) #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ + clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) @@ -233,10 +192,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) -#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) -#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) -#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/trunk/include/asm-x86/e820.h b/trunk/include/asm-x86/e820.h index 5abbdec06bd2..f52daf176bcb 100644 --- a/trunk/include/asm-x86/e820.h +++ b/trunk/include/asm-x86/e820.h @@ -43,7 +43,6 @@ #define E820_RESERVED 2 #define E820_ACPI 3 #define E820_NVS 4 -#define E820_UNUSABLE 5 /* reserved RAM used by kernel itself */ #define E820_RESERVED_KERN 128 @@ -122,7 +121,6 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn, extern u64 e820_hole_size(u64 start, u64 end); extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); -extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); extern char *machine_specific_memory_setup(void); diff --git a/trunk/include/asm-x86/es7000/apicdef.h b/trunk/include/asm-x86/es7000/apicdef.h deleted file mode 100644 index 8b234a3cb851..000000000000 --- a/trunk/include/asm-x86/es7000/apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_ES7000_APICDEF_H -#define __ASM_ES7000_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/trunk/include/asm-x86/genapic_64.h b/trunk/include/asm-x86/genapic_64.h index ed6a4886c082..25097a8cc5ef 100644 --- a/trunk/include/asm-x86/genapic_64.h +++ b/trunk/include/asm-x86/genapic_64.h @@ -14,7 +14,6 @@ struct genapic { char *name; - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); @@ -25,24 +24,17 @@ struct genapic { void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; }; extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; -extern struct genapic apic_x2apic_cluster; -extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); -extern void apic_send_IPI_self(int vector); enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); diff --git a/trunk/include/asm-x86/hw_irq.h b/trunk/include/asm-x86/hw_irq.h index 50f6e0316b50..65997b15d56a 100644 --- a/trunk/include/asm-x86/hw_irq.h +++ b/trunk/include/asm-x86/hw_irq.h @@ -64,6 +64,7 @@ extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); @@ -72,9 +73,7 @@ extern void enable_IO_APIC(void); #endif /* IPI functions */ -#ifdef CONFIG_X86_32 extern void send_IPI_self(int vector); -#endif extern void send_IPI(int dest, int vector); /* Statistics */ diff --git a/trunk/include/asm-x86/i387.h b/trunk/include/asm-x86/i387.h index 9ba862a4eac0..1ecdc3ed96e4 100644 --- a/trunk/include/asm-x86/i387.h +++ b/trunk/include/asm-x86/i387.h @@ -19,9 +19,7 @@ #include #include #include -#include -extern unsigned int sig_xstate_size; extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); @@ -33,18 +31,12 @@ extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; -extern struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); +extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); +extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); #endif -#define X87_FSW_ES (1 << 7) /* Exception Summary */ - #ifdef CONFIG_X86_64 /* Ignore delayed exceptions from user space */ @@ -55,7 +47,7 @@ static inline void tolerant_fwait(void) _ASM_EXTABLE(1b, 2b)); } -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) { int err; @@ -75,31 +67,15 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -static inline int restore_fpu_checking(struct task_struct *tsk) -{ - if (task_thread_info(tsk)->status & TS_XSAVE) - return xrstor_checking(&tsk->thread.xstate->xsave); - else - return fxrstor_checking(&tsk->thread.xstate->fxsave); -} +#define X87_FSW_ES (1 << 7) /* Exception Summary */ /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. The kernel data segment can be sometimes 0 and sometimes new user value. Both should be ok. Use the PDA as safe address because it should be already in L1. */ -static inline void clear_fpu_state(struct task_struct *tsk) +static inline void clear_fpu_state(struct i387_fxsave_struct *fx) { - struct xsave_struct *xstate = &tsk->thread.xstate->xsave; - struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; - - /* - * xsave header may indicate the init state of the FP. - */ - if ((task_thread_info(tsk)->status & TS_XSAVE) && - !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - return; - if (unlikely(fx->swd & X87_FSW_ES)) asm volatile("fnclex"); alternative_input(ASM_NOP8 ASM_NOP2, @@ -108,7 +84,7 @@ static inline void clear_fpu_state(struct task_struct *tsk) X86_FEATURE_FXSAVE_LEAK); } -static inline int fxsave_user(struct i387_fxsave_struct __user *fx) +static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) { int err; @@ -132,7 +108,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) return err; } -static inline void fxsave(struct task_struct *tsk) +static inline void __save_init_fpu(struct task_struct *tsk) { /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix @@ -157,16 +133,7 @@ static inline void fxsave(struct task_struct *tsk) : "=m" (tsk->thread.xstate->fxsave) : "cdaSDb" (&tsk->thread.xstate->fxsave)); #endif -} - -static inline void __save_init_fpu(struct task_struct *tsk) -{ - if (task_thread_info(tsk)->status & TS_XSAVE) - xsave(tsk); - else - fxsave(tsk); - - clear_fpu_state(tsk); + clear_fpu_state(&tsk->thread.xstate->fxsave); task_thread_info(tsk)->status &= ~TS_USEDFPU; } @@ -181,10 +148,6 @@ static inline void tolerant_fwait(void) static inline void restore_fpu(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) { - xrstor_checking(&tsk->thread.xstate->xsave); - return; - } /* * The "nop" is needed to make the instructions the same * length. @@ -210,27 +173,6 @@ static inline void restore_fpu(struct task_struct *tsk) */ static inline void __save_init_fpu(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) { - struct xsave_struct *xstate = &tsk->thread.xstate->xsave; - struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; - - xsave(tsk); - - /* - * xsave header may indicate the init state of the FP. - */ - if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - goto end; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - - /* - * we can do a simple return here or be paranoid :) - */ - goto clear_state; - } - /* Use more nops than strictly needed in case the compiler varies code */ alternative_input( @@ -240,7 +182,6 @@ static inline void __save_init_fpu(struct task_struct *tsk) X86_FEATURE_FXSR, [fx] "m" (tsk->thread.xstate->fxsave), [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); -clear_state: /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -250,17 +191,16 @@ static inline void __save_init_fpu(struct task_struct *tsk) "fildl %[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); -end: task_thread_info(tsk)->status &= ~TS_USEDFPU; } -#endif /* CONFIG_X86_64 */ - /* * Signal frame handlers... */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +extern int save_i387(struct _fpstate __user *buf); +extern int restore_i387(struct _fpstate __user *buf); + +#endif /* CONFIG_X86_64 */ static inline void __unlazy_fpu(struct task_struct *tsk) { diff --git a/trunk/include/asm-x86/i8259.h b/trunk/include/asm-x86/i8259.h index 23c1b3baaecd..c586559a6957 100644 --- a/trunk/include/asm-x86/i8259.h +++ b/trunk/include/asm-x86/i8259.h @@ -57,7 +57,4 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; -extern void mask_8259A(void); -extern void unmask_8259A(void); - #endif /* ASM_X86__I8259_H */ diff --git a/trunk/include/asm-x86/io_apic.h b/trunk/include/asm-x86/io_apic.h index 8ec68a50cf10..be62847ab07e 100644 --- a/trunk/include/asm-x86/io_apic.h +++ b/trunk/include/asm-x86/io_apic.h @@ -107,20 +107,6 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); -struct IR_IO_APIC_route_entry { - __u64 vector : 8, - zero : 3, - index2 : 1, - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, - mask : 1, - reserved : 31, - format : 1, - index : 15; -} __attribute__ ((packed)); - #ifdef CONFIG_X86_IO_APIC /* @@ -197,12 +183,6 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); -#ifdef CONFIG_X86_64 -extern int save_mask_IO_APIC_setup(void); -extern void restore_IO_APIC_setup(void); -extern void reinit_intr_remapped_IO_APIC(int); -#endif - #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; diff --git a/trunk/include/asm-x86/ipi.h b/trunk/include/asm-x86/ipi.h index 30a692cfaff8..c1b226797518 100644 --- a/trunk/include/asm-x86/ipi.h +++ b/trunk/include/asm-x86/ipi.h @@ -49,12 +49,6 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } -static inline void __xapic_wait_icr_idle(void) -{ - while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { @@ -70,7 +64,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Wait for idle. */ - __xapic_wait_icr_idle(); + apic_wait_icr_idle(); /* * No need to touch the target chip field @@ -80,7 +74,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - native_apic_mem_write(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } /* @@ -98,13 +92,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, if (unlikely(vector == NMI_VECTOR)) safe_apic_wait_icr_idle(); else - __xapic_wait_icr_idle(); + apic_wait_icr_idle(); /* * prepare target chip field */ cfg = __prepare_ICR2(mask); - native_apic_mem_write(APIC_ICR2, cfg); + apic_write(APIC_ICR2, cfg); /* * program the ICR @@ -114,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - native_apic_mem_write(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) diff --git a/trunk/include/asm-x86/irq_remapping.h b/trunk/include/asm-x86/irq_remapping.h deleted file mode 100644 index 78242c6ffa58..000000000000 --- a/trunk/include/asm-x86/irq_remapping.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_IRQ_REMAPPING_H -#define _ASM_IRQ_REMAPPING_H - -extern int x2apic; - -#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) - -#endif diff --git a/trunk/include/asm-x86/bigsmp/apic.h b/trunk/include/asm-x86/mach-bigsmp/mach_apic.h similarity index 94% rename from trunk/include/asm-x86/bigsmp/apic.h rename to trunk/include/asm-x86/mach-bigsmp/mach_apic.h index 0a9cd7c5ca0c..05362d44a3ee 100644 --- a/trunk/include/asm-x86/bigsmp/apic.h +++ b/trunk/include/asm-x86/mach-bigsmp/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H +#define ASM_X86__MACH_BIGSMP__MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) #define esr_disable (1) @@ -11,7 +11,7 @@ static inline int apic_id_registered(void) /* Round robin the irqs amoung the online cpus */ static inline cpumask_t target_cpus(void) -{ +{ static unsigned long cpu = NR_CPUS; do { if (cpu >= NR_CPUS) @@ -23,7 +23,7 @@ static inline cpumask_t target_cpus(void) } #undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 +#define APIC_DEST_LOGICAL 0 #define TARGET_CPUS (target_cpus()) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define INT_DELIVERY_MODE (dest_Fixed) @@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ diff --git a/trunk/include/asm-x86/mach-bigsmp/mach_apicdef.h b/trunk/include/asm-x86/mach-bigsmp/mach_apicdef.h new file mode 100644 index 000000000000..811935d9d49b --- /dev/null +++ b/trunk/include/asm-x86/mach-bigsmp/mach_apicdef.h @@ -0,0 +1,13 @@ +#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H +#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */ diff --git a/trunk/include/asm-x86/bigsmp/ipi.h b/trunk/include/asm-x86/mach-bigsmp/mach_ipi.h similarity index 77% rename from trunk/include/asm-x86/bigsmp/ipi.h rename to trunk/include/asm-x86/mach-bigsmp/mach_ipi.h index 9404c535b7ec..b1b0f966a009 100644 --- a/trunk/include/asm-x86/bigsmp/ipi.h +++ b/trunk/include/asm-x86/mach-bigsmp/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H +#define ASM_X86__MACH_BIGSMP__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ diff --git a/trunk/include/asm-x86/mach-default/mach_apic.h b/trunk/include/asm-x86/mach-default/mach_apic.h index 2a330a41b3dd..b615f40736be 100644 --- a/trunk/include/asm-x86/mach-default/mach_apic.h +++ b/trunk/include/asm-x86/mach-default/mach_apic.h @@ -30,8 +30,6 @@ static inline cpumask_t target_cpus(void) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) -#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) -#define send_IPI_self (genapic->send_IPI_self) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio @@ -56,7 +54,7 @@ static inline void init_apic_ldr(void) static inline int apic_id_registered(void) { - return physid_isset(read_apic_id(), phys_cpu_present_map); + return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); } static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) diff --git a/trunk/include/asm-x86/mach-default/mach_apicdef.h b/trunk/include/asm-x86/mach-default/mach_apicdef.h index 0c2d41c41b20..936704f816d6 100644 --- a/trunk/include/asm-x86/mach-default/mach_apicdef.h +++ b/trunk/include/asm-x86/mach-default/mach_apicdef.h @@ -4,9 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (genapic->apic_id_mask) -#define GET_APIC_ID(x) (genapic->get_apic_id(x)) -#define SET_APIC_ID(x) (genapic->set_apic_id(x)) +#define APIC_ID_MASK (0xFFu<<24) +#define GET_APIC_ID(x) (((x)>>24)&0xFFu) +#define SET_APIC_ID(x) (((x)<<24)) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/trunk/include/asm-x86/es7000/apic.h b/trunk/include/asm-x86/mach-es7000/mach_apic.h similarity index 91% rename from trunk/include/asm-x86/es7000/apic.h rename to trunk/include/asm-x86/mach-es7000/mach_apic.h index bd2c44d1f7ac..c1f6f682d619 100644 --- a/trunk/include/asm-x86/es7000/apic.h +++ b/trunk/include/asm-x86/mach-es7000/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_ES7000_APIC_H -#define __ASM_ES7000_APIC_H +#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H +#define ASM_X86__MACH_ES7000__MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) #define esr_disable (1) @@ -10,7 +10,7 @@ static inline int apic_id_registered(void) } static inline cpumask_t target_cpus(void) -{ +{ #if defined CONFIG_ES7000_CLUSTERED_APIC return CPU_MASK_ALL; #else @@ -23,24 +23,24 @@ static inline cpumask_t target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) #define INT_DELIVERY_MODE (dest_LowestPrio) #define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ (1) +#define NO_BALANCE_IRQ (1) #undef WAKE_SECONDARY_VIA_INIT #define WAKE_SECONDARY_VIA_MIP #else #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define INT_DELIVERY_MODE (dest_Fixed) #define INT_DEST_MODE (0) /* phys delivery to target procs */ -#define NO_BALANCE_IRQ (0) +#define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 #define WAKE_SECONDARY_VIA_INIT #endif static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ +{ return 0; -} -static inline unsigned long check_apicid_present(int bit) +} +static inline unsigned long check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } @@ -80,7 +80,7 @@ static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? + (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); } @@ -141,7 +141,7 @@ static inline void setup_portio_remap(void) extern unsigned int boot_cpu_physical_apicid; static inline int check_phys_apicid_present(int cpu_physical_apicid) { - boot_cpu_physical_apicid = read_apic_id(); + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); return (1); } @@ -150,7 +150,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) int num_bits_set; int cpus_found = 0; int cpu; - int apicid; + int apicid; num_bits_set = cpus_weight(cpumask); /* Return id to all */ @@ -160,16 +160,16 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) #else return cpu_to_logical_apicid(0); #endif - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. */ cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != + if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n",__FUNCTION__); #if defined CONFIG_ES7000_CLUSTERED_APIC @@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_ES7000_APIC_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ diff --git a/trunk/include/asm-x86/mach-es7000/mach_apicdef.h b/trunk/include/asm-x86/mach-es7000/mach_apicdef.h new file mode 100644 index 000000000000..a07e56744028 --- /dev/null +++ b/trunk/include/asm-x86/mach-es7000/mach_apicdef.h @@ -0,0 +1,13 @@ +#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H +#define ASM_X86__MACH_ES7000__MACH_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */ diff --git a/trunk/include/asm-x86/es7000/ipi.h b/trunk/include/asm-x86/mach-es7000/mach_ipi.h similarity index 77% rename from trunk/include/asm-x86/es7000/ipi.h rename to trunk/include/asm-x86/mach-es7000/mach_ipi.h index 632a955fcc0a..3a21240e03dc 100644 --- a/trunk/include/asm-x86/es7000/ipi.h +++ b/trunk/include/asm-x86/mach-es7000/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_ES7000_IPI_H -#define __ASM_ES7000_IPI_H +#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H +#define ASM_X86__MACH_ES7000__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_ES7000_IPI_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ diff --git a/trunk/include/asm-x86/es7000/mpparse.h b/trunk/include/asm-x86/mach-es7000/mach_mpparse.h similarity index 81% rename from trunk/include/asm-x86/es7000/mpparse.h rename to trunk/include/asm-x86/mach-es7000/mach_mpparse.h index 7b5c889d8e7d..befde24705b7 100644 --- a/trunk/include/asm-x86/es7000/mpparse.h +++ b/trunk/include/asm-x86/mach-es7000/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_ES7000_MPPARSE_H -#define __ASM_ES7000_MPPARSE_H +#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H +#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H #include @@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void) } #endif -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ diff --git a/trunk/include/asm-x86/es7000/wakecpu.h b/trunk/include/asm-x86/mach-es7000/mach_wakecpu.h similarity index 89% rename from trunk/include/asm-x86/es7000/wakecpu.h rename to trunk/include/asm-x86/mach-es7000/mach_wakecpu.h index 3ffc5a7bf667..97c776ce13f2 100644 --- a/trunk/include/asm-x86/es7000/wakecpu.h +++ b/trunk/include/asm-x86/mach-es7000/mach_wakecpu.h @@ -1,7 +1,7 @@ -#ifndef __ASM_ES7000_WAKECPU_H -#define __ASM_ES7000_WAKECPU_H +#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H +#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H -/* +/* * This file copes with machines that wakeup secondary CPUs by the * INIT, INIT, STARTUP sequence. */ @@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} #endif -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ diff --git a/trunk/include/asm-x86/numaq/apic.h b/trunk/include/asm-x86/mach-numaq/mach_apic.h similarity index 96% rename from trunk/include/asm-x86/numaq/apic.h rename to trunk/include/asm-x86/mach-numaq/mach_apic.h index a8344ba6ea15..7a0d39edfcfa 100644 --- a/trunk/include/asm-x86/numaq/apic.h +++ b/trunk/include/asm-x86/mach-numaq/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_NUMAQ_APIC_H -#define __ASM_NUMAQ_APIC_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H +#define ASM_X86__MACH_NUMAQ__MACH_APIC_H #include #include @@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_NUMAQ_APIC_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ diff --git a/trunk/include/asm-x86/numaq/apicdef.h b/trunk/include/asm-x86/mach-numaq/mach_apicdef.h similarity index 55% rename from trunk/include/asm-x86/numaq/apicdef.h rename to trunk/include/asm-x86/mach-numaq/mach_apicdef.h index e012a46cc22a..f870ec5f7782 100644 --- a/trunk/include/asm-x86/numaq/apicdef.h +++ b/trunk/include/asm-x86/mach-numaq/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_NUMAQ_APICDEF_H -#define __ASM_NUMAQ_APICDEF_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H +#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H #define APIC_ID_MASK (0xF<<24) @@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ diff --git a/trunk/include/asm-x86/numaq/ipi.h b/trunk/include/asm-x86/mach-numaq/mach_ipi.h similarity index 77% rename from trunk/include/asm-x86/numaq/ipi.h rename to trunk/include/asm-x86/mach-numaq/mach_ipi.h index 935588d286cf..1e835823f4bc 100644 --- a/trunk/include/asm-x86/numaq/ipi.h +++ b/trunk/include/asm-x86/mach-numaq/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_NUMAQ_IPI_H -#define __ASM_NUMAQ_IPI_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H +#define ASM_X86__MACH_NUMAQ__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_NUMAQ_IPI_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ diff --git a/trunk/include/asm-x86/mach-numaq/mach_mpparse.h b/trunk/include/asm-x86/mach-numaq/mach_mpparse.h new file mode 100644 index 000000000000..74ade184920b --- /dev/null +++ b/trunk/include/asm-x86/mach-numaq/mach_mpparse.h @@ -0,0 +1,7 @@ +#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H +#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H + +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */ diff --git a/trunk/include/asm-x86/numaq/wakecpu.h b/trunk/include/asm-x86/mach-numaq/mach_wakecpu.h similarity index 88% rename from trunk/include/asm-x86/numaq/wakecpu.h rename to trunk/include/asm-x86/mach-numaq/mach_wakecpu.h index c577bda5b1c5..0db8cea643c0 100644 --- a/trunk/include/asm-x86/numaq/wakecpu.h +++ b/trunk/include/asm-x86/mach-numaq/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_NUMAQ_WAKECPU_H -#define __ASM_NUMAQ_WAKECPU_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H +#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H /* This file copes with machines that wakeup secondary CPUs by NMIs */ @@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} -#endif /* __ASM_NUMAQ_WAKECPU_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ diff --git a/trunk/include/asm-x86/summit/irq_vectors_limits.h b/trunk/include/asm-x86/mach-summit/irq_vectors_limits.h similarity index 65% rename from trunk/include/asm-x86/summit/irq_vectors_limits.h rename to trunk/include/asm-x86/mach-summit/irq_vectors_limits.h index 890ce3f5e09a..22f376ad68e1 100644 --- a/trunk/include/asm-x86/summit/irq_vectors_limits.h +++ b/trunk/include/asm-x86/mach-summit/irq_vectors_limits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H +#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H +#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H /* * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, @@ -11,4 +11,4 @@ #define NR_IRQS 224 #define NR_IRQ_VECTORS 1024 -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ +#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ diff --git a/trunk/include/asm-x86/summit/apic.h b/trunk/include/asm-x86/mach-summit/mach_apic.h similarity index 93% rename from trunk/include/asm-x86/summit/apic.h rename to trunk/include/asm-x86/mach-summit/mach_apic.h index c5b2e4b10358..7a66758d701d 100644 --- a/trunk/include/asm-x86/summit/apic.h +++ b/trunk/include/asm-x86/mach-summit/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_SUMMIT_APIC_H -#define __ASM_SUMMIT_APIC_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H +#define ASM_X86__MACH_SUMMIT__MACH_APIC_H #include @@ -21,7 +21,7 @@ static inline cpumask_t target_cpus(void) * Just start on cpu 0. IRQ balancing will spread load */ return cpumask_of_cpu(0); -} +} #define TARGET_CPUS (target_cpus()) #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -30,10 +30,10 @@ static inline cpumask_t target_cpus(void) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; -} +} /* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long check_apicid_present(int bit) { return 1; } @@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) static inline physid_mask_t apicid_to_cpu_present(int apicid) { - return physid_mask_of_physid(0); + return physid_mask_of_physid(apicid); } static inline void setup_portio_remap(void) @@ -143,22 +143,22 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) int num_bits_set; int cpus_found = 0; int cpu; - int apicid; + int apicid; num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. */ cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != + if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n",__FUNCTION__); return 0xFF; @@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return hard_smp_processor_id() >> index_msb; } -#endif /* __ASM_SUMMIT_APIC_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ diff --git a/trunk/include/asm-x86/mach-summit/mach_apicdef.h b/trunk/include/asm-x86/mach-summit/mach_apicdef.h new file mode 100644 index 000000000000..d4bc8590c4f6 --- /dev/null +++ b/trunk/include/asm-x86/mach-summit/mach_apicdef.h @@ -0,0 +1,13 @@ +#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H +#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */ diff --git a/trunk/include/asm-x86/summit/ipi.h b/trunk/include/asm-x86/mach-summit/mach_ipi.h similarity index 77% rename from trunk/include/asm-x86/summit/ipi.h rename to trunk/include/asm-x86/mach-summit/mach_ipi.h index 53bd1e7bd7b4..a3b31c528d90 100644 --- a/trunk/include/asm-x86/summit/ipi.h +++ b/trunk/include/asm-x86/mach-summit/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_SUMMIT_IPI_H -#define __ASM_SUMMIT_IPI_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H +#define ASM_X86__MACH_SUMMIT__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_SUMMIT_IPI_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ diff --git a/trunk/include/asm-x86/summit/mpparse.h b/trunk/include/asm-x86/mach-summit/mach_mpparse.h similarity index 95% rename from trunk/include/asm-x86/summit/mpparse.h rename to trunk/include/asm-x86/mach-summit/mach_mpparse.h index 013ce6fab2d5..92396f28772b 100644 --- a/trunk/include/asm-x86/summit/mpparse.h +++ b/trunk/include/asm-x86/mach-summit/mach_mpparse.h @@ -1,6 +1,7 @@ -#ifndef __ASM_SUMMIT_MPPARSE_H -#define __ASM_SUMMIT_MPPARSE_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H +#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H +#include #include extern int use_cyclone; @@ -11,11 +12,11 @@ extern void setup_summit(void); #define setup_summit() {} #endif -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, +static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) { - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) || !strncmp(productid, "EXA", 3) || !strncmp(productid, "RUTHLESS SMP", 12))){ mark_tsc_unstable("Summit based system"); @@ -106,4 +107,4 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } -#endif /* __ASM_SUMMIT_MPPARSE_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ diff --git a/trunk/include/asm-x86/mpspec.h b/trunk/include/asm-x86/mpspec.h index be2241a818f1..118da365e371 100644 --- a/trunk/include/asm-x86/mpspec.h +++ b/trunk/include/asm-x86/mpspec.h @@ -5,12 +5,11 @@ #include -extern int apic_version[MAX_APICS]; - #ifdef CONFIG_X86_32 #include extern unsigned int def_to_bigsmp; +extern int apic_version[MAX_APICS]; extern u8 apicid_2_node[]; extern int pic_mode; diff --git a/trunk/include/asm-x86/msidef.h b/trunk/include/asm-x86/msidef.h index ed9190246876..3139666a94fa 100644 --- a/trunk/include/asm-x86/msidef.h +++ b/trunk/include/asm-x86/msidef.h @@ -48,8 +48,4 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) -#define MSI_ADDR_IR_EXT_INT (1 << 4) -#define MSI_ADDR_IR_SHV (1 << 3) -#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) -#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) #endif /* ASM_X86__MSIDEF_H */ diff --git a/trunk/include/asm-x86/msr-index.h b/trunk/include/asm-x86/msr-index.h index 0bb43301a202..3052f058ab06 100644 --- a/trunk/include/asm-x86/msr-index.h +++ b/trunk/include/asm-x86/msr-index.h @@ -176,7 +176,6 @@ #define MSR_IA32_TSC 0x00000010 #define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_EBL_CR_POWERON 0x0000002a -#define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) @@ -311,19 +310,4 @@ /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 -/* Intel VT MSRs */ -#define MSR_IA32_VMX_BASIC 0x00000480 -#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 -#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 -#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 -#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 -#define MSR_IA32_VMX_MISC 0x00000485 -#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 -#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 -#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 -#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 -#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a -#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b -#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c - #endif /* ASM_X86__MSR_INDEX_H */ diff --git a/trunk/include/asm-x86/numaq/mpparse.h b/trunk/include/asm-x86/numaq/mpparse.h deleted file mode 100644 index 252292e077b6..000000000000 --- a/trunk/include/asm-x86/numaq/mpparse.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_NUMAQ_MPPARSE_H -#define __ASM_NUMAQ_MPPARSE_H - -extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); - -#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/trunk/include/asm-x86/paravirt.h b/trunk/include/asm-x86/paravirt.h index d7d358a43996..891971f57d35 100644 --- a/trunk/include/asm-x86/paravirt.h +++ b/trunk/include/asm-x86/paravirt.h @@ -201,6 +201,12 @@ struct pv_irq_ops { struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC + /* + * Direct APIC operations, principally for VMI. Ideally + * these shouldn't be in this interface. + */ + void (*apic_write)(unsigned long reg, u32 v); + u32 (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -904,6 +910,19 @@ static inline void slow_down_io(void) } #ifdef CONFIG_X86_LOCAL_APIC +/* + * Basic functions accessing APICs. + */ +static inline void apic_write(unsigned long reg, u32 v) +{ + PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); +} + +static inline u32 apic_read(unsigned long reg) +{ + return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); +} + static inline void setup_boot_clock(void) { PVOP_VCALL0(pv_apic_ops.setup_boot_clock); diff --git a/trunk/include/asm-x86/processor-cyrix.h b/trunk/include/asm-x86/processor-cyrix.h index 1198f2a0e42c..97568ada1f97 100644 --- a/trunk/include/asm-x86/processor-cyrix.h +++ b/trunk/include/asm-x86/processor-cyrix.h @@ -28,11 +28,3 @@ static inline void setCx86(u8 reg, u8 data) outb(reg, 0x22); outb(data, 0x23); } - -#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86_old(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - diff --git a/trunk/include/asm-x86/processor-flags.h b/trunk/include/asm-x86/processor-flags.h index dc5f0712f9fa..5dd79774f693 100644 --- a/trunk/include/asm-x86/processor-flags.h +++ b/trunk/include/asm-x86/processor-flags.h @@ -59,7 +59,6 @@ #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ -#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ /* * x86-64 Task Priority Register, CR8 diff --git a/trunk/include/asm-x86/processor.h b/trunk/include/asm-x86/processor.h index c7d35464a4bb..5eaf9bf0a623 100644 --- a/trunk/include/asm-x86/processor.h +++ b/trunk/include/asm-x86/processor.h @@ -76,11 +76,11 @@ struct cpuinfo_x86 { int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; -#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; +#endif /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; __u32 x86_capability[NCAPINTS]; @@ -166,8 +166,11 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; -extern void detect_extended_topology(struct cpuinfo_x86 *c); +#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) extern void detect_ht(struct cpuinfo_x86 *c); +#else +static inline void detect_ht(struct cpuinfo_x86 *c) {} +#endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) @@ -324,12 +327,7 @@ struct i387_fxsave_struct { /* 16*16 bytes for each XMM-reg = 256 bytes: */ u32 xmm_space[64]; - u32 padding[12]; - - union { - u32 padding1[12]; - u32 sw_reserved[12]; - }; + u32 padding[24]; } __attribute__((aligned(16))); @@ -353,23 +351,10 @@ struct i387_soft_struct { u32 entry_eip; }; -struct xsave_hdr_struct { - u64 xstate_bv; - u64 reserved1[2]; - u64 reserved2[5]; -} __attribute__((packed)); - -struct xsave_struct { - struct i387_fxsave_struct i387; - struct xsave_hdr_struct xsave_hdr; - /* new processor state extensions will go here */ -} __attribute__ ((packed, aligned (64))); - union thread_xstate { struct i387_fsave_struct fsave; struct i387_fxsave_struct fxsave; struct i387_soft_struct soft; - struct xsave_struct xsave; }; #ifdef CONFIG_X86_64 diff --git a/trunk/include/asm-x86/setup.h b/trunk/include/asm-x86/setup.h index 11b6cc14b289..9030cb73c4d7 100644 --- a/trunk/include/asm-x86/setup.h +++ b/trunk/include/asm-x86/setup.h @@ -38,7 +38,6 @@ struct x86_quirks { void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, unsigned short oemsize); - int (*setup_ioapic_ids)(void); }; extern struct x86_quirks *x86_quirks; diff --git a/trunk/include/asm-x86/sigcontext.h b/trunk/include/asm-x86/sigcontext.h index ee813f4fe5d5..24879c85b291 100644 --- a/trunk/include/asm-x86/sigcontext.h +++ b/trunk/include/asm-x86/sigcontext.h @@ -4,40 +4,6 @@ #include #include -#define FP_XSTATE_MAGIC1 0x46505853U -#define FP_XSTATE_MAGIC2 0x46505845U -#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2) - -/* - * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame - * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes - * are used to extended the fpstate pointer in the sigcontext, which now - * includes the extended state information along with fpstate information. - * - * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved - * area and FP_XSTATE_MAGIC2 at the end of memory layout - * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the - * extended state information in the memory layout pointed by the fpstate - * pointer in sigcontext. - */ -struct _fpx_sw_bytes { - __u32 magic1; /* FP_XSTATE_MAGIC1 */ - __u32 extended_size; /* total size of the layout referred by - * fpstate pointer in the sigcontext. - */ - __u64 xstate_bv; - /* feature bit mask (including fp/sse/extended - * state) that is present in the memory - * layout. - */ - __u32 xstate_size; /* actual xsave state size, based on the - * features saved in the layout. - * 'extended_size' will be greater than - * 'xstate_size'. - */ - __u32 padding[7]; /* for future use. */ -}; - #ifdef __i386__ /* * As documented in the iBCS2 standard.. @@ -87,13 +53,7 @@ struct _fpstate { unsigned long reserved; struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ struct _xmmreg _xmm[8]; - unsigned long padding1[44]; - - union { - unsigned long padding2[12]; - struct _fpx_sw_bytes sw_reserved; /* represents the extended - * state info */ - }; + unsigned long padding[56]; }; #define X86_FXSR_MAGIC 0x0000 @@ -119,15 +79,7 @@ struct sigcontext { unsigned long flags; unsigned long sp_at_signal; unsigned short ss, __ssh; - - /* - * fpstate is really (struct _fpstate *) or (struct _xstate *) - * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved - * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of - * (struct _fpx_sw_bytes) - */ - void __user *fpstate; /* zero when no FPU/extended context */ + struct _fpstate __user *fpstate; unsigned long oldmask; unsigned long cr2; }; @@ -178,12 +130,7 @@ struct _fpstate { __u32 mxcsr_mask; __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ - __u32 reserved2[12]; - union { - __u32 reserved3[12]; - struct _fpx_sw_bytes sw_reserved; /* represents the extended - * state information */ - }; + __u32 reserved2[24]; }; #ifdef __KERNEL__ @@ -214,15 +161,7 @@ struct sigcontext { unsigned long trapno; unsigned long oldmask; unsigned long cr2; - - /* - * fpstate is really (struct _fpstate *) or (struct _xstate *) - * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved - * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of - * (struct _fpx_sw_bytes) - */ - void __user *fpstate; /* zero when no FPU/extended context */ + struct _fpstate __user *fpstate; /* zero when no FPU context */ unsigned long reserved1[8]; }; #else /* __KERNEL__ */ @@ -263,22 +202,4 @@ struct sigcontext { #endif /* !__i386__ */ -struct _xsave_hdr { - __u64 xstate_bv; - __u64 reserved1[2]; - __u64 reserved2[5]; -}; - -/* - * Extended state pointed by the fpstate pointer in the sigcontext. - * In addition to the fpstate, information encoded in the xstate_hdr - * indicates the presence of other extended state information - * supported by the processor and OS. - */ -struct _xstate { - struct _fpstate fpstate; - struct _xsave_hdr xstate_hdr; - /* new processor state extensions go here */ -}; - #endif /* ASM_X86__SIGCONTEXT_H */ diff --git a/trunk/include/asm-x86/sigcontext32.h b/trunk/include/asm-x86/sigcontext32.h index 8c347032c2f2..4e2ec732dd01 100644 --- a/trunk/include/asm-x86/sigcontext32.h +++ b/trunk/include/asm-x86/sigcontext32.h @@ -40,11 +40,7 @@ struct _fpstate_ia32 { __u32 reserved; struct _fpxreg _fxsr_st[8]; struct _xmmreg _xmm[8]; /* It's actually 16 */ - __u32 padding[44]; - union { - __u32 padding2[12]; - struct _fpx_sw_bytes sw_reserved; - }; + __u32 padding[56]; }; struct sigcontext_ia32 { diff --git a/trunk/include/asm-x86/smp.h b/trunk/include/asm-x86/smp.h index 29324c103341..04f84f4e2c8b 100644 --- a/trunk/include/asm-x86/smp.h +++ b/trunk/include/asm-x86/smp.h @@ -167,33 +167,30 @@ extern int safe_smp_processor_id(void); #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 static inline int logical_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#include +#ifndef CONFIG_X86_64 static inline unsigned int read_apic_id(void) { - unsigned int reg; - - reg = *(u32 *)(APIC_BASE + APIC_ID); - - return GET_APIC_ID(reg); + return *(u32 *)(APIC_BASE + APIC_ID); } +#else +extern unsigned int read_apic_id(void); #endif -# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) +# ifdef APIC_DEFINITION extern int hard_smp_processor_id(void); # else -#include +# include static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return read_apic_id(); + return GET_APIC_ID(read_apic_id()); } # endif /* APIC_DEFINITION */ diff --git a/trunk/include/asm-x86/summit/apicdef.h b/trunk/include/asm-x86/summit/apicdef.h deleted file mode 100644 index f3fbca1f61c1..000000000000 --- a/trunk/include/asm-x86/summit/apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_SUMMIT_APICDEF_H -#define __ASM_SUMMIT_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (x>>24)&0xFF; -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/trunk/include/asm-x86/thread_info.h b/trunk/include/asm-x86/thread_info.h index 3f4e52bb77f5..4db0066a3a35 100644 --- a/trunk/include/asm-x86/thread_info.h +++ b/trunk/include/asm-x86/thread_info.h @@ -241,7 +241,6 @@ static inline struct thread_info *stack_thread_info(void) #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ -#define TS_XSAVE 0x0010 /* Use xsave/xrstor */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) diff --git a/trunk/include/asm-x86/ucontext.h b/trunk/include/asm-x86/ucontext.h index 89eaa5456a7e..9948dd328084 100644 --- a/trunk/include/asm-x86/ucontext.h +++ b/trunk/include/asm-x86/ucontext.h @@ -1,12 +1,6 @@ #ifndef ASM_X86__UCONTEXT_H #define ASM_X86__UCONTEXT_H -#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state - * information in the memory layout pointed - * by the fpstate pointer in the ucontext's - * sigcontext struct (uc_mcontext). - */ - struct ucontext { unsigned long uc_flags; struct ucontext *uc_link; diff --git a/trunk/include/asm-x86/xcr.h b/trunk/include/asm-x86/xcr.h deleted file mode 100644 index f2cba4e79a23..000000000000 --- a/trunk/include/asm-x86/xcr.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright 2008 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2 or (at your - * option) any later version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * asm-x86/xcr.h - * - * Definitions for the eXtended Control Register instructions - */ - -#ifndef _ASM_X86_XCR_H -#define _ASM_X86_XCR_H - -#define XCR_XFEATURE_ENABLED_MASK 0x00000000 - -#ifdef __KERNEL__ -# ifndef __ASSEMBLY__ - -#include - -static inline u64 xgetbv(u32 index) -{ - u32 eax, edx; - - asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((u64)edx << 32); -} - -static inline void xsetbv(u32 index, u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - - asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ - : : "a" (eax), "d" (edx), "c" (index)); -} - -# endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_XCR_H */ diff --git a/trunk/include/asm-x86/xsave.h b/trunk/include/asm-x86/xsave.h deleted file mode 100644 index 08e9a1ac07a9..000000000000 --- a/trunk/include/asm-x86/xsave.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef __ASM_X86_XSAVE_H -#define __ASM_X86_XSAVE_H - -#include -#include -#include - -#define XSTATE_FP 0x1 -#define XSTATE_SSE 0x2 - -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) - -#define FXSAVE_SIZE 512 - -/* - * These are the features that the OS can handle currently. - */ -#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE) - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -extern unsigned int xstate_size; -extern u64 pcntxt_mask; -extern struct xsave_struct *init_xstate_buf; - -extern void xsave_cntxt_init(void); -extern void xsave_init(void); -extern int init_fpu(struct task_struct *child); -extern int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *sw); - -static inline int xrstor_checking(struct xsave_struct *fx) -{ - int err; - - asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) - : "memory"); - - return err; -} - -static inline int xsave_user(struct xsave_struct __user *buf) -{ - int err; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - _ASM_ALIGN "\n" - _ASM_PTR "1b,3b\n" - ".previous" - : [err] "=r" (err) - : "D" (buf), "a" (-1), "d" (-1), "0" (0) - : "memory"); - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; -} - -static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) -{ - int err; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - _ASM_ALIGN "\n" - _ASM_PTR "1b,3b\n" - ".previous" - : [err] "=r" (err) - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) - : "memory"); /* memory required? */ - return err; -} - -static inline void xrstor_state(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static inline void xsave(struct task_struct *tsk) -{ - /* This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" - : : "D" (&(tsk->thread.xstate->xsave)), - "a" (-1), "d"(-1) : "memory"); -} -#endif diff --git a/trunk/include/linux/ata.h b/trunk/include/linux/ata.h index a53318b8cbd0..be00973d1a8c 100644 --- a/trunk/include/linux/ata.h +++ b/trunk/include/linux/ata.h @@ -30,7 +30,6 @@ #define __LINUX_ATA_H__ #include -#include /* defines only for the constants which don't work well as enums */ #define ATA_DMA_BOUNDARY 0xffffUL @@ -559,15 +558,6 @@ static inline int ata_id_has_flush(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 12); } -static inline int ata_id_flush_enabled(const u16 *id) -{ - if (ata_id_has_flush(id) == 0) - return 0; - if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) - return 0; - return id[ATA_ID_CFS_ENABLE_2] & (1 << 12); -} - static inline int ata_id_has_flush_ext(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) @@ -575,19 +565,6 @@ static inline int ata_id_has_flush_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 13); } -static inline int ata_id_flush_ext_enabled(const u16 *id) -{ - if (ata_id_has_flush_ext(id) == 0) - return 0; - if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) - return 0; - /* - * some Maxtor disks have bit 13 defined incorrectly - * so check bit 10 too - */ - return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; -} - static inline int ata_id_has_lba48(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) @@ -597,15 +574,6 @@ static inline int ata_id_has_lba48(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 10); } -static inline int ata_id_lba48_enabled(const u16 *id) -{ - if (ata_id_has_lba48(id) == 0) - return 0; - if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) - return 0; - return id[ATA_ID_CFS_ENABLE_2] & (1 << 10); -} - static inline int ata_id_hpa_enabled(const u16 *id) { /* Yes children, word 83 valid bits cover word 82 data */ @@ -677,15 +645,7 @@ static inline unsigned int ata_id_major_version(const u16 *id) static inline int ata_id_is_sata(const u16 *id) { - /* - * See if word 93 is 0 AND drive is at least ATA-5 compatible - * verifying that word 80 by casting it to a signed type -- - * this trick allows us to filter out the reserved values of - * 0x0000 and 0xffff along with the earlier ATA revisions... - */ - if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) - return 1; - return 0; + return ata_id_major_version(id) >= 5 && id[ATA_ID_HW_CONFIG] == 0; } static inline int ata_id_has_tpm(const u16 *id) @@ -782,76 +742,6 @@ static inline int atapi_id_dmadir(const u16 *dev_id) return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); } -/* - * ata_id_is_lba_capacity_ok() performs a sanity check on - * the claimed LBA capacity value for the device. - * - * Returns 1 if LBA capacity looks sensible, 0 otherwise. - * - * It is called only once for each device. - */ -static inline int ata_id_is_lba_capacity_ok(u16 *id) -{ - unsigned long lba_sects, chs_sects, head, tail; - - /* No non-LBA info .. so valid! */ - if (id[ATA_ID_CYLS] == 0) - return 1; - - lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY); - - /* - * The ATA spec tells large drives to return - * C/H/S = 16383/16/63 independent of their size. - * Some drives can be jumpered to use 15 heads instead of 16. - * Some drives can be jumpered to use 4092 cyls instead of 16383. - */ - if ((id[ATA_ID_CYLS] == 16383 || - (id[ATA_ID_CYLS] == 4092 && id[ATA_ID_CUR_CYLS] == 16383)) && - id[ATA_ID_SECTORS] == 63 && - (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) && - (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS])) - return 1; - - chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS]; - - /* perform a rough sanity check on lba_sects: within 10% is OK */ - if (lba_sects - chs_sects < chs_sects/10) - return 1; - - /* some drives have the word order reversed */ - head = (lba_sects >> 16) & 0xffff; - tail = lba_sects & 0xffff; - lba_sects = head | (tail << 16); - - if (lba_sects - chs_sects < chs_sects/10) { - *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects); - return 1; /* LBA capacity is (now) good */ - } - - return 0; /* LBA capacity value may be bad */ -} - -static inline void ata_id_to_hd_driveid(u16 *id) -{ -#ifdef __BIG_ENDIAN - /* accessed in struct hd_driveid as 8-bit values */ - id[ATA_ID_MAX_MULTSECT] = __cpu_to_le16(id[ATA_ID_MAX_MULTSECT]); - id[ATA_ID_CAPABILITY] = __cpu_to_le16(id[ATA_ID_CAPABILITY]); - id[ATA_ID_OLD_PIO_MODES] = __cpu_to_le16(id[ATA_ID_OLD_PIO_MODES]); - id[ATA_ID_OLD_DMA_MODES] = __cpu_to_le16(id[ATA_ID_OLD_DMA_MODES]); - id[ATA_ID_MULTSECT] = __cpu_to_le16(id[ATA_ID_MULTSECT]); - - /* as 32-bit values */ - *(u32 *)&id[ATA_ID_LBA_CAPACITY] = ata_id_u32(id, ATA_ID_LBA_CAPACITY); - *(u32 *)&id[ATA_ID_SPG] = ata_id_u32(id, ATA_ID_SPG); - - /* as 64-bit value */ - *(u64 *)&id[ATA_ID_LBA_CAPACITY_2] = - ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); -#endif -} - static inline int is_multi_taskfile(struct ata_taskfile *tf) { return (tf->command == ATA_CMD_READ_MULTI) || diff --git a/trunk/include/linux/dmar.h b/trunk/include/linux/dmar.h index c360c558e59e..56c73b847551 100644 --- a/trunk/include/linux/dmar.h +++ b/trunk/include/linux/dmar.h @@ -25,99 +25,9 @@ #include #include -#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) +#ifdef CONFIG_DMAR struct intel_iommu; -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - -extern struct list_head dmar_drhd_units; - -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); - -/* Intel IOMMU detection */ -extern void detect_intel_iommu(void); - - -extern int parse_ioapics_under_ir(void); -extern int alloc_iommu(struct dmar_drhd_unit *); -#else -static inline void detect_intel_iommu(void) -{ - return; -} - -static inline int dmar_table_init(void) -{ - return -ENODEV; -} -#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ - -#ifdef CONFIG_INTR_REMAP -extern int intr_remapping_enabled; -extern int enable_intr_remapping(int); - -struct irte { - union { - struct { - __u64 present : 1, - fpd : 1, - dst_mode : 1, - redir_hint : 1, - trigger_mode : 1, - dlvry_mode : 3, - avail : 4, - __reserved_1 : 4, - vector : 8, - __reserved_2 : 8, - dest_id : 32; - }; - __u64 low; - }; - - union { - struct { - __u64 sid : 16, - sq : 2, - svt : 2, - __reserved_3 : 44; - }; - __u64 high; - }; -}; -extern int get_irte(int irq, struct irte *entry); -extern int modify_irte(int irq, struct irte *irte_modified); -extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); -extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, - u16 sub_handle); -extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); -extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); -extern int flush_irte(int irq); -extern int free_irte(int irq); - -extern int irq_remapped(int irq); -extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); -extern struct intel_iommu *map_ioapic_to_ir(int apic); -#else -#define irq_remapped(irq) (0) -#define enable_intr_remapping(mode) (-1) -#define intr_remapping_enabled (0) -#endif - -#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -130,30 +40,47 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -extern int iommu_detected, no_iommu; +/* Intel IOMMU detection and initialization functions */ +extern void detect_intel_iommu(void); +extern int intel_iommu_init(void); + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); + +extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; + +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ - struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ int devices_cnt; /* target device count */ }; +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -/* Intel DMAR initialization functions */ -extern int intel_iommu_init(void); -extern int dmar_disabled; #else +static inline void detect_intel_iommu(void) +{ + return; +} static inline int intel_iommu_init(void) { -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else return -ENODEV; -#endif } + #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ diff --git a/trunk/include/linux/ext3_fs.h b/trunk/include/linux/ext3_fs.h index 8120fa1bc235..80171ee89a22 100644 --- a/trunk/include/linux/ext3_fs.h +++ b/trunk/include/linux/ext3_fs.h @@ -837,8 +837,6 @@ extern void ext3_truncate (struct inode *); extern void ext3_set_inode_flags(struct inode *); extern void ext3_get_inode_flags(struct ext3_inode_info *); extern void ext3_set_aops(struct inode *inode); -extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len); /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, diff --git a/trunk/include/linux/fiemap.h b/trunk/include/linux/fiemap.h deleted file mode 100644 index 671decbd2aeb..000000000000 --- a/trunk/include/linux/fiemap.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * FS_IOC_FIEMAP ioctl infrastructure. - * - * Some portions copyright (C) 2007 Cluster File Systems, Inc - * - * Authors: Mark Fasheh - * Kalpak Shah - * Andreas Dilger - */ - -#ifndef _LINUX_FIEMAP_H -#define _LINUX_FIEMAP_H - -struct fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent from the beginning of the file */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent from the beginning of the disk */ - __u64 fe_length; /* length in bytes for this extent */ - __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ - __u32 fe_reserved[3]; -}; - -struct fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) */ - __u64 fm_length; /* logical length of mapping which - * userspace wants (in) */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ - __u32 fm_reserved; - struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ -}; - -#define FIEMAP_MAX_OFFSET (~0ULL) - -#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ -#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ - -#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) - -#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ -#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ -#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. - * Sets EXTENT_UNKNOWN. */ -#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read - * while fs is unmounted */ -#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. - * Sets EXTENT_NO_BYPASS. */ -#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be - * block aligned. */ -#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. - * Sets EXTENT_NOT_ALIGNED.*/ -#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. - * Sets EXTENT_NOT_ALIGNED.*/ -#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but - * no data (i.e. zero). */ -#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively - * support extents. Result - * merged for efficiency. */ - -#endif /* _LINUX_FIEMAP_H */ diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index 44e3cb2f1966..32477e8872d5 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -234,7 +234,6 @@ extern int dir_notify_enable; #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) #define FS_IOC_SETVERSION _IOW('v', 2, long) -#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) #define FS_IOC32_GETFLAGS _IOR('f', 1, int) #define FS_IOC32_SETFLAGS _IOW('f', 2, int) #define FS_IOC32_GETVERSION _IOR('v', 1, int) @@ -295,7 +294,6 @@ extern int dir_notify_enable; #include #include #include -#include #include #include @@ -1183,20 +1181,6 @@ extern void dentry_unhash(struct dentry *dentry); */ extern int file_permission(struct file *, int); -/* - * VFS FS_IOC_FIEMAP helper definitions. - */ -struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent - * array */ -}; -int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, - u64 phys, u64 len, u32 flags); -int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); - /* * File types * @@ -1306,8 +1290,6 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); - int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, - u64 len); }; struct seq_file; @@ -2005,9 +1987,6 @@ extern int vfs_fstat(unsigned int, struct kstat *); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); -extern int generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block); extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); diff --git a/trunk/include/linux/ide.h b/trunk/include/linux/ide.h index a9d82d6e6bdd..6514db8fd2e4 100644 --- a/trunk/include/linux/ide.h +++ b/trunk/include/linux/ide.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include @@ -17,7 +17,6 @@ #include #include #include -#include #ifdef CONFIG_BLK_DEV_IDEACPI #include #endif @@ -88,13 +87,12 @@ struct ide_io_ports { }; #define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good)) +#define BAD_R_STAT (BUSY_STAT | ERR_STAT) +#define BAD_W_STAT (BAD_R_STAT | WRERR_STAT) +#define BAD_STAT (BAD_R_STAT | DRQ_STAT) +#define DRIVE_READY (READY_STAT | SEEK_STAT) -#define BAD_R_STAT (ATA_BUSY | ATA_ERR) -#define BAD_W_STAT (BAD_R_STAT | ATA_DF) -#define BAD_STAT (BAD_R_STAT | ATA_DRQ) -#define DRIVE_READY (ATA_DRDY | ATA_DSC) - -#define BAD_CRC (ATA_ABORTED | ATA_ICRC) +#define BAD_CRC (ABRT_ERR | ICRC_ERR) #define SATA_NR_PORTS (3) /* 16 possible ?? */ @@ -127,41 +125,24 @@ struct ide_io_ports { #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ #define SECTOR_SIZE 512 - +#define SECTOR_WORDS (SECTOR_SIZE / 4) /* number of 32bit words per sector */ #define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t))) /* * Timeouts for various operations: */ -enum { - /* spec allows up to 20ms */ - WAIT_DRQ = HZ / 10, /* 100ms */ - /* some laptops are very slow */ - WAIT_READY = 5 * HZ, /* 5s */ - /* should be less than 3ms (?), if all ATAPI CD is closed at boot */ - WAIT_PIDENTIFY = 10 * HZ, /* 10s */ - /* worst case when spinning up */ - WAIT_WORSTCASE = 30 * HZ, /* 30s */ - /* maximum wait for an IRQ to happen */ - WAIT_CMD = 10 * HZ, /* 10s */ - /* Some drives require a longer IRQ timeout. */ - WAIT_FLOPPY_CMD = 50 * HZ, /* 50s */ - /* - * Some drives (for example, Seagate STT3401A Travan) require a very - * long timeout, because they don't return an interrupt or clear their - * BSY bit until after the command completes (even retension commands). - */ - WAIT_TAPE_CMD = 900 * HZ, /* 900s */ - /* minimum sleep time */ - WAIT_MIN_SLEEP = HZ / 50, /* 20ms */ -}; +#define WAIT_DRQ (HZ/10) /* 100msec - spec allows up to 20ms */ +#define WAIT_READY (5*HZ) /* 5sec - some laptops are very slow */ +#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */ +#define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */ +#define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ +#define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ /* * Op codes for special requests to be handled by ide_special_rq(). * Values should be in the range of 0x20 to 0x3f. */ #define REQ_DRIVE_RESET 0x20 -#define REQ_DEVSET_EXEC 0x21 /* * Check for an interrupt and acknowledge the interrupt status @@ -322,8 +303,8 @@ typedef enum { ide_started, /* a drive operation was started, handler was set */ } ide_startstop_t; -struct ide_devset; struct ide_driver_s; +struct ide_settings_s; #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_drive_link; @@ -334,10 +315,10 @@ struct ide_acpi_hwif_link; enum { IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), IDE_AFLAG_MEDIA_CHANGED = (1 << 1), - /* Drive cannot lock the door. */ - IDE_AFLAG_NO_DOORLOCK = (1 << 2), /* ide-cd */ + /* Drive cannot lock the door. */ + IDE_AFLAG_NO_DOORLOCK = (1 << 2), /* Drive cannot eject the disc. */ IDE_AFLAG_NO_EJECT = (1 << 3), /* Drive is a pre ATAPI 1.2 drive. */ @@ -373,25 +354,21 @@ enum { IDE_AFLAG_CLIK_DRIVE = (1 << 19), /* Requires BH algorithm for packets */ IDE_AFLAG_ZIP_DRIVE = (1 << 20), - /* Write protect */ - IDE_AFLAG_WP = (1 << 21), - /* Supports format progress report */ - IDE_AFLAG_SRFP = (1 << 22), /* ide-tape */ - IDE_AFLAG_IGNORE_DSC = (1 << 23), + IDE_AFLAG_IGNORE_DSC = (1 << 21), /* 0 When the tape position is unknown */ - IDE_AFLAG_ADDRESS_VALID = (1 << 24), + IDE_AFLAG_ADDRESS_VALID = (1 << 22), /* Device already opened */ - IDE_AFLAG_BUSY = (1 << 25), + IDE_AFLAG_BUSY = (1 << 23), /* Attempt to auto-detect the current user block size */ - IDE_AFLAG_DETECT_BS = (1 << 26), + IDE_AFLAG_DETECT_BS = (1 << 24), /* Currently on a filemark */ - IDE_AFLAG_FILEMARK = (1 << 27), + IDE_AFLAG_FILEMARK = (1 << 25), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 28), + IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), - IDE_AFLAG_NO_AUTOCLOSE = (1 << 29), + IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), }; struct ide_drive_s { @@ -403,10 +380,10 @@ struct ide_drive_s { struct request *rq; /* current request */ struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ - u16 *id; /* identification info */ + struct hd_driveid *id; /* drive model identification info */ #ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ - const struct ide_proc_devset *settings; /* /proc/ide/ drive settings */ + struct ide_settings_s *settings;/* /proc/ide/ drive settings */ #endif struct hwif_s *hwif; /* actually (ide_hwif_t *) */ @@ -418,16 +395,16 @@ struct ide_drive_s { special_t special; /* special action flags */ select_t select; /* basic drive/head select reg value */ + u8 keep_settings; /* restore settings after drive reset */ + u8 using_dma; /* disk is using dma for read/write */ u8 retry_pio; /* retrying dma capable host in pio */ u8 state; /* retry state */ u8 waiting_for_dma; /* dma currently in progress */ + u8 unmask; /* okay to unmask other irqs */ + u8 noflush; /* don't attempt flushes */ + u8 dsc_overlap; /* DSC overlap */ + u8 nice1; /* give potential excess bandwidth */ - unsigned keep_settings : 1; /* restore settings after drive reset */ - unsigned using_dma : 1; /* disk is using dma for read/write */ - unsigned unmask : 1; /* okay to unmask other irqs */ - unsigned noflush : 1; /* don't attempt flushes */ - unsigned dsc_overlap : 1; /* DSC overlap */ - unsigned nice1 : 1; /* give potential excess bandwidth */ unsigned present : 1; /* drive is physically present */ unsigned dead : 1; /* device ejected hint */ unsigned id_read : 1; /* 1=id read from disk 0 = synthetic */ @@ -437,22 +414,23 @@ struct ide_drive_s { unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */ unsigned no_unmask : 1; /* disallow setting unmask bit */ unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */ + unsigned atapi_overlap : 1; /* ATAPI overlap (not supported) */ unsigned doorlocking : 1; /* for removable only: door lock/unlock works */ unsigned nodma : 1; /* disallow DMA */ + unsigned remap_0_to_1 : 1; /* 0=noremap, 1=remap 0->1 (for EZDrive) */ unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */ unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ unsigned post_reset : 1; unsigned udma33_warned : 1; - unsigned addressing : 2; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ - unsigned wcache : 1; /* status of write cache */ - unsigned nowerr : 1; /* used for ignoring ATA_DF */ + u8 addressing; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ u8 quirk_list; /* considered quirky, set for a specific host */ u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ u8 dn; /* now wide spread use */ + u8 wcache; /* status of write cache */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ u8 ready_stat; /* min status value for drive ready */ @@ -460,7 +438,9 @@ struct ide_drive_s { u8 mult_req; /* requested multiple sector setting */ u8 tune_req; /* requested drive tuning setting */ u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ - u8 bad_wstat; /* used for ignoring ATA_DF */ + u8 bad_wstat; /* used for ignoring WRERR_STAT */ + u8 nowerr; /* used for ignoring WRERR_STAT */ + u8 sect0; /* offset of first sector for DM6:DDO */ u8 head; /* "real" number of heads */ u8 sect; /* "real" sectors per track */ u8 bios_head; /* BIOS/fdisk/LILO number of heads */ @@ -494,6 +474,10 @@ typedef struct ide_drive_s ide_drive_t; #define to_ide_device(dev)container_of(dev, ide_drive_t, gendev) +#define IDE_CHIPSET_PCI_MASK \ + ((1<> (c)) & 1) + struct ide_task_s; struct ide_port_info; @@ -583,6 +567,7 @@ typedef struct hwif_s { u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ u8 channel; /* for dual-port chips: 0=primary, 1=secondary */ + u8 bus_state; /* power state of the IDE bus */ u32 host_flags; @@ -660,7 +645,6 @@ struct ide_host { ide_hwif_t *ports[MAX_HWIFS]; unsigned int n_ports; struct device *dev[2]; - unsigned int (*init_chipset)(struct pci_dev *); unsigned long host_flags; void *host_priv; }; @@ -708,61 +692,9 @@ typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; -/* - * configurable drive settings - */ - -#define DS_SYNC (1 << 0) - -struct ide_devset { - int (*get)(ide_drive_t *); - int (*set)(ide_drive_t *, int); - unsigned int flags; -}; - -#define __DEVSET(_flags, _get, _set) { \ - .flags = _flags, \ - .get = _get, \ - .set = _set, \ -} - -#define ide_devset_get(name, field) \ -static int get_##name(ide_drive_t *drive) \ -{ \ - return drive->field; \ -} - -#define ide_devset_set(name, field) \ -static int set_##name(ide_drive_t *drive, int arg) \ -{ \ - drive->field = arg; \ - return 0; \ -} - -#define __IDE_DEVSET(_name, _flags, _get, _set) \ -const struct ide_devset ide_devset_##_name = \ - __DEVSET(_flags, _get, _set) - -#define IDE_DEVSET(_name, _flags, _get, _set) \ -static __IDE_DEVSET(_name, _flags, _get, _set) - -#define ide_devset_rw(_name, _func) \ -IDE_DEVSET(_name, 0, get_##_func, set_##_func) - -#define ide_devset_w(_name, _func) \ -IDE_DEVSET(_name, 0, NULL, set_##_func) - -#define ide_devset_rw_sync(_name, _func) \ -IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func) - -#define ide_decl_devset(_name) \ -extern const struct ide_devset ide_devset_##_name - -ide_decl_devset(io_32bit); -ide_decl_devset(keepsettings); -ide_decl_devset(pio_mode); -ide_decl_devset(unmaskirq); -ide_decl_devset(using_dma); +int set_io_32bit(ide_drive_t *, int); +int set_pio_mode(ide_drive_t *, int); +int set_using_dma(ide_drive_t *, int); /* ATAPI packet command flags */ enum { @@ -778,12 +710,6 @@ enum { PC_FLAG_TIMEDOUT = (1 << 7), }; -/* - * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. - * This is used for several packet commands (not for READ/WRITE commands). - */ -#define IDE_PC_BUFFER_SIZE 256 - struct ide_atapi_pc { /* actual packet bytes */ u8 c[12]; @@ -813,7 +739,7 @@ struct ide_atapi_pc { * those are more or less driver-specific and some of them are subject * to change/removal later. */ - u8 pc_buf[IDE_PC_BUFFER_SIZE]; + u8 pc_buf[256]; /* idetape only */ struct idetape_bh *bh; @@ -831,34 +757,37 @@ struct ide_atapi_pc { #ifdef CONFIG_IDE_PROC_FS /* - * /proc/ide interface + * configurable drive settings */ -#define ide_devset_rw_field(_name, _field) \ -ide_devset_get(_name, _field); \ -ide_devset_set(_name, _field); \ -IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name) - -struct ide_proc_devset { - const char *name; - const struct ide_devset *setting; - int min, max; - int (*mulf)(ide_drive_t *); - int (*divf)(ide_drive_t *); -}; +#define TYPE_INT 0 +#define TYPE_BYTE 1 +#define TYPE_SHORT 2 -#define __IDE_PROC_DEVSET(_name, _min, _max, _mulf, _divf) { \ - .name = __stringify(_name), \ - .setting = &ide_devset_##_name, \ - .min = _min, \ - .max = _max, \ - .mulf = _mulf, \ - .divf = _divf, \ -} +#define SETTING_READ (1 << 0) +#define SETTING_WRITE (1 << 1) +#define SETTING_RW (SETTING_READ | SETTING_WRITE) -#define IDE_PROC_DEVSET(_name, _min, _max) \ -__IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) +typedef int (ide_procset_t)(ide_drive_t *, int); +typedef struct ide_settings_s { + char *name; + int rw; + int data_type; + int min; + int max; + int mul_factor; + int div_factor; + void *data; + ide_procset_t *set; + int auto_remove; + struct ide_settings_s *next; +} ide_settings_t; + +int ide_add_setting(ide_drive_t *, const char *, int, int, int, int, int, int, void *, ide_procset_t *set); +/* + * /proc/ide interface + */ typedef struct { const char *name; mode_t mode; @@ -875,6 +804,8 @@ void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); +void ide_add_generic_settings(ide_drive_t *); + read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -902,6 +833,7 @@ static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } +static inline void ide_add_generic_settings(ide_drive_t *drive) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -947,6 +879,7 @@ enum { struct ide_driver_s { const char *version; u8 media; + unsigned supports_dsc_overlap : 1; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); @@ -956,8 +889,7 @@ struct ide_driver_s { void (*resume)(ide_drive_t *); void (*shutdown)(ide_drive_t *); #ifdef CONFIG_IDE_PROC_FS - ide_proc_entry_t *proc; - const struct ide_proc_devset *settings; + ide_proc_entry_t *proc; #endif }; @@ -966,17 +898,7 @@ struct ide_driver_s { int ide_device_get(ide_drive_t *); void ide_device_put(ide_drive_t *); -struct ide_ioctl_devset { - unsigned int get_ioctl; - unsigned int set_ioctl; - const struct ide_devset *setting; -}; - -int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int, - unsigned long, const struct ide_ioctl_devset *); - -int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, - unsigned, unsigned long); +int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); extern int ide_vlb_clk; extern int ide_pci_clk; @@ -998,19 +920,14 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -void ide_fix_driveid(u16 *); +extern void ide_fix_driveid(struct hd_driveid *); extern void ide_fixstring(u8 *, const int, const int); -int ide_busy_sleep(ide_hwif_t *, unsigned long, int); - int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); -extern int ide_devset_execute(ide_drive_t *drive, - const struct ide_devset *setting, int arg); - extern void ide_do_drive_cmd(ide_drive_t *, struct request *); extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); @@ -1134,8 +1051,6 @@ void ide_tf_read(ide_drive_t *, ide_task_t *); void ide_input_data(ide_drive_t *, struct request *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct request *, void *, unsigned int); -int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int); - extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); @@ -1146,36 +1061,11 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); -int ide_check_atapi_device(ide_drive_t *, const char *); - -void ide_init_pc(struct ide_atapi_pc *); - -/* - * Special requests for ide-tape block device strategy routine. - * - * In order to service a character device command, we add special requests to - * the tail of our block device request queue and wait for their completion. - */ -enum { - REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */ - REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */ - REQ_IDETAPE_READ = (1 << 2), - REQ_IDETAPE_WRITE = (1 << 3), -}; - -void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, - struct request *); -int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); - -int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); -int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); -int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); - ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), - int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, + void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int)); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); @@ -1190,6 +1080,8 @@ int ide_raw_taskfile(ide_drive_t *, ide_task_t *, u8 *, u16); int ide_no_data_taskfile(ide_drive_t *, ide_task_t *); int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long); +int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long); +int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long); extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); @@ -1200,6 +1092,7 @@ extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout); extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); +extern int ide_spin_wait_hwgroup(ide_drive_t *); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); extern void do_ide_request(struct request_queue *); @@ -1336,14 +1229,6 @@ int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); void ide_pci_remove(struct pci_dev *); -#ifdef CONFIG_PM -int ide_pci_suspend(struct pci_dev *, pm_message_t); -int ide_pci_resume(struct pci_dev *); -#else -#define ide_pci_suspend NULL -#define ide_pci_resume NULL -#endif - void ide_map_sg(ide_drive_t *, struct request *); void ide_init_sg_cmd(ide_drive_t *, struct request *); @@ -1355,7 +1240,7 @@ struct drive_list_entry { const char *id_firmware; }; -int ide_in_drive_list(u16 *, const struct drive_list_entry *); +int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); #ifdef CONFIG_BLK_DEV_IDEDMA int __ide_dma_bad_drive(ide_drive_t *); @@ -1462,6 +1347,24 @@ const char *ide_xfer_verbose(u8 mode); extern void ide_toggle_bounce(ide_drive_t *drive, int on); extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); +static inline int ide_dev_has_iordy(struct hd_driveid *id) +{ + return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0; +} + +static inline int ide_dev_is_sata(struct hd_driveid *id) +{ + /* + * See if word 93 is 0 AND drive is at least ATA-5 compatible + * verifying that word 80 by casting it to a signed type -- + * this trick allows us to filter out the reserved values of + * 0x0000 and 0xffff along with the earlier ATA revisions... + */ + if (id->hw_config == 0 && (short)id->major_rev_num >= 0x0020) + return 1; + return 0; +} + u64 ide_get_lba_addr(struct ide_taskfile *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); @@ -1533,6 +1436,13 @@ extern struct mutex ide_cfg_mtx; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; +/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */ +#define ide_id_has_flush_cache(id) ((id)->cfs_enable_2 & 0x3000) + +/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */ +#define ide_id_has_flush_cache_ext(id) \ + (((id)->cfs_enable_2 & 0x2400) == 0x2400) + static inline void ide_dump_identify(u8 *id) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 2, id, 512, 0); @@ -1543,10 +1453,10 @@ static inline int hwif_to_node(ide_hwif_t *hwif) return hwif->dev ? dev_to_node(hwif->dev) : -1; } -static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) +static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive) { - ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; + ide_hwif_t *hwif = HWIF(drive); - return peer->present ? peer : NULL; + return &hwif->drives[(drive->dn ^ 1) & 1]; } #endif /* _IDE_H */ diff --git a/trunk/include/linux/ioport.h b/trunk/include/linux/ioport.h index ee9bcc6f32b6..350033e8f4e1 100644 --- a/trunk/include/linux/ioport.h +++ b/trunk/include/linux/ioport.h @@ -108,9 +108,6 @@ extern struct resource iomem_resource; extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); -extern void reserve_region_with_split(struct resource *root, - resource_size_t start, resource_size_t end, - const char *name); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int allocate_resource(struct resource *root, struct resource *new, diff --git a/trunk/include/linux/irq.h b/trunk/include/linux/irq.h index 8d9411bc60f6..8ccb462ea42c 100644 --- a/trunk/include/linux/irq.h +++ b/trunk/include/linux/irq.h @@ -62,7 +62,6 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ -#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) diff --git a/trunk/include/linux/jbd2.h b/trunk/include/linux/jbd2.h index 66c3499478b5..3dd209007098 100644 --- a/trunk/include/linux/jbd2.h +++ b/trunk/include/linux/jbd2.h @@ -850,8 +850,7 @@ struct journal_s */ struct block_device *j_dev; int j_blocksize; - unsigned long long j_blk_offset; - char j_devname[BDEVNAME_SIZE+24]; + unsigned long long j_blk_offset; /* * Device which holds the client fs. For internal journal this will be diff --git a/trunk/include/linux/percpu.h b/trunk/include/linux/percpu.h index 9f2a3751873a..fac3337547eb 100644 --- a/trunk/include/linux/percpu.h +++ b/trunk/include/linux/percpu.h @@ -23,19 +23,12 @@ __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp - -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.page_aligned"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) - -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) #endif #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) diff --git a/trunk/include/linux/percpu_counter.h b/trunk/include/linux/percpu_counter.h index 9007ccdfc112..208388835357 100644 --- a/trunk/include/linux/percpu_counter.h +++ b/trunk/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/trunk/kernel/irq/manage.c b/trunk/kernel/irq/manage.c index 60c49e324390..0314074fa232 100644 --- a/trunk/kernel/irq/manage.c +++ b/trunk/kernel/irq/manage.c @@ -89,14 +89,7 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) set_balance_irq_affinity(irq, cpumask); #ifdef CONFIG_GENERIC_PENDING_IRQ - if (desc->status & IRQ_MOVE_PCNTXT) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - desc->chip->set_affinity(irq, cpumask); - spin_unlock_irqrestore(&desc->lock, flags); - } else - set_pending_irq(irq, cpumask); + set_pending_irq(irq, cpumask); #else desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); diff --git a/trunk/kernel/resource.c b/trunk/kernel/resource.c index 414d6fc9131e..03d796c1b2e9 100644 --- a/trunk/kernel/resource.c +++ b/trunk/kernel/resource.c @@ -516,74 +516,6 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t return result; } -static void __init __reserve_region_with_split(struct resource *root, - resource_size_t start, resource_size_t end, - const char *name) -{ - struct resource *parent = root; - struct resource *conflict; - struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); - - if (!res) - return; - - res->name = name; - res->start = start; - res->end = end; - res->flags = IORESOURCE_BUSY; - - for (;;) { - conflict = __request_resource(parent, res); - if (!conflict) - break; - if (conflict != parent) { - parent = conflict; - if (!(conflict->flags & IORESOURCE_BUSY)) - continue; - } - - /* Uhhuh, that didn't work out.. */ - kfree(res); - res = NULL; - break; - } - - if (!res) { - printk(KERN_DEBUG " __reserve_region_with_split: (%s) [%llx, %llx], res: (%s) [%llx, %llx]\n", - conflict->name, conflict->start, conflict->end, - name, start, end); - - /* failed, split and try again */ - - /* conflict coverred whole area */ - if (conflict->start <= start && conflict->end >= end) - return; - - if (conflict->start > start) - __reserve_region_with_split(root, start, conflict->start-1, name); - if (!(conflict->flags & IORESOURCE_BUSY)) { - resource_size_t common_start, common_end; - - common_start = max(conflict->start, start); - common_end = min(conflict->end, end); - if (common_start < common_end) - __reserve_region_with_split(root, common_start, common_end, name); - } - if (conflict->end < end) - __reserve_region_with_split(root, conflict->end+1, end, name); - } - -} - -void reserve_region_with_split(struct resource *root, - resource_size_t start, resource_size_t end, - const char *name) -{ - write_lock(&resource_lock); - __reserve_region_with_split(root, start, end, name); - write_unlock(&resource_lock); -} - EXPORT_SYMBOL(adjust_resource); /** diff --git a/trunk/lib/percpu_counter.c b/trunk/lib/percpu_counter.c index a8663890a88c..4a8ba4bf5f6f 100644 --- a/trunk/lib/percpu_counter.c +++ b/trunk/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) { s64 ret; int cpu; @@ -62,9 +62,11 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - *pcount = 0; + if (set) + *pcount = 0; } - fbc->count = ret; + if (set) + fbc->count = ret; spin_unlock(&fbc->lock); return ret; diff --git a/trunk/net/netfilter/ipvs/Kconfig b/trunk/net/netfilter/ipvs/Kconfig index de6004de80bc..05048e403266 100644 --- a/trunk/net/netfilter/ipvs/Kconfig +++ b/trunk/net/netfilter/ipvs/Kconfig @@ -2,8 +2,8 @@ # IP Virtual Server configuration # menuconfig IP_VS - tristate "IP virtual server support (EXPERIMENTAL)" - depends on NETFILTER + tristate "IP virtual server support" + depends on NET && INET && NETFILTER ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This