diff --git a/[refs] b/[refs] index 2ad239ca6e5b..12455d4b4c9f 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: a98fe7f3425c6b4e90de16f8da63b0429a8fed08 +refs/heads/master: 1d8ce7bc4d05b4a5c04dc17f92fef26989fb5935 diff --git a/trunk/Documentation/x86/earlyprintk.txt b/trunk/Documentation/x86/earlyprintk.txt deleted file mode 100644 index 607b1a016064..000000000000 --- a/trunk/Documentation/x86/earlyprintk.txt +++ /dev/null @@ -1,101 +0,0 @@ - -Mini-HOWTO for using the earlyprintk=dbgp boot option with a -USB2 Debug port key and a debug cable, on x86 systems. - -You need two computers, the 'USB debug key' special gadget and -and two USB cables, connected like this: - - [host/target] <-------> [USB debug key] <-------> [client/console] - -1. There are three specific hardware requirements: - - a.) Host/target system needs to have USB debug port capability. - - You can check this capability by looking at a 'Debug port' bit in - the lspci -vvv output: - - # lspci -vvv - ... - 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI]) - Subsystem: Lenovo ThinkPad T61 - Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- - Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- SERR- /proc/sysrq-trigger - - On the host/target system you should see this help line in "dmesg" output: - - SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z) - - On the client/console system do: - - cat /dev/ttyUSB0 - - And you should see the help line above displayed shortly after you've - provoked it on the host system. - -If it does not work then please ask about it on the linux-kernel@vger.kernel.org -mailing list or contact the x86 maintainers. diff --git a/trunk/arch/powerpc/configs/40x/virtex_defconfig b/trunk/arch/powerpc/configs/40x/virtex_defconfig index f5698f962e58..b6888384dd74 100644 --- a/trunk/arch/powerpc/configs/40x/virtex_defconfig +++ b/trunk/arch/powerpc/configs/40x/virtex_defconfig @@ -686,7 +686,7 @@ CONFIG_SERIAL_UARTLITE_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set -CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_SERIAL_OF_PLATFORM is not set # CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set CONFIG_UNIX98_PTYS=y # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set diff --git a/trunk/arch/powerpc/configs/44x/virtex5_defconfig b/trunk/arch/powerpc/configs/44x/virtex5_defconfig index 1bf0a63614b1..15aab1ca6384 100644 --- a/trunk/arch/powerpc/configs/44x/virtex5_defconfig +++ b/trunk/arch/powerpc/configs/44x/virtex5_defconfig @@ -691,7 +691,7 @@ CONFIG_SERIAL_UARTLITE_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set -CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_SERIAL_OF_PLATFORM is not set # CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set CONFIG_UNIX98_PTYS=y # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set diff --git a/trunk/arch/powerpc/configs/linkstation_defconfig b/trunk/arch/powerpc/configs/linkstation_defconfig index 15900dcf0bfa..aa5855a156de 100644 --- a/trunk/arch/powerpc/configs/linkstation_defconfig +++ b/trunk/arch/powerpc/configs/linkstation_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc6 -# Fri Mar 6 00:07:38 2009 +# Linux kernel version: 2.6.29-rc2 +# Mon Jan 26 15:35:29 2009 # # CONFIG_PPC64 is not set @@ -71,15 +71,6 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set - -# -# RCU Subsystem -# -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -97,7 +88,6 @@ CONFIG_NAMESPACES=y # CONFIG_IPC_NS is not set # CONFIG_USER_NS is not set # CONFIG_PID_NS is not set -# CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -163,6 +153,11 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_FREEZER is not set # @@ -299,6 +294,7 @@ CONFIG_NET=y # # Networking options # +# CONFIG_NET_NS is not set CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y @@ -512,8 +508,8 @@ CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_OF_PARTS is not set # CONFIG_MTD_AR7_PARTS is not set # @@ -591,6 +587,7 @@ CONFIG_MTD_PHYSMAP=y # LPDDR flash memory drivers # # CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set # # UBI - Unsorted block images @@ -620,19 +617,13 @@ CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set # CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set # CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -CONFIG_EEPROM_LEGACY=m -# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -848,7 +839,6 @@ CONFIG_R8169=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set -# CONFIG_ATL1C is not set # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set @@ -1047,6 +1037,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/trunk/arch/powerpc/configs/storcenter_defconfig b/trunk/arch/powerpc/configs/storcenter_defconfig index 94903465ea12..86512c8790d1 100644 --- a/trunk/arch/powerpc/configs/storcenter_defconfig +++ b/trunk/arch/powerpc/configs/storcenter_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc6 -# Fri Mar 6 00:09:08 2009 +# Linux kernel version: 2.6.29-rc2 +# Mon Jan 26 15:35:46 2009 # # CONFIG_PPC64 is not set @@ -71,15 +71,6 @@ CONFIG_SYSVIPC_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set - -# -# RCU Subsystem -# -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_GROUP_SCHED=y @@ -153,6 +144,11 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_FREEZER is not set # @@ -381,8 +377,8 @@ CONFIG_MTD=y CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_OF_PARTS is not set # CONFIG_MTD_AR7_PARTS is not set # @@ -456,6 +452,7 @@ CONFIG_MTD_PHYSMAP=y # LPDDR flash memory drivers # # CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set # # UBI - Unsorted block images @@ -481,19 +478,13 @@ CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set # CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set # CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set -# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y CONFIG_IDE=y @@ -686,7 +677,6 @@ CONFIG_R8169=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set -# CONFIG_ATL1C is not set # CONFIG_JME is not set # CONFIG_NETDEV_10000 is not set # CONFIG_TR is not set @@ -828,6 +818,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1167,7 +1159,6 @@ CONFIG_JFFS2_RTIME=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_EXPORTFS=m # # Partition Types diff --git a/trunk/arch/powerpc/platforms/embedded6xx/linkstation.c b/trunk/arch/powerpc/platforms/embedded6xx/linkstation.c index 244f997de791..2ca7be65c2d2 100644 --- a/trunk/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/trunk/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -21,6 +22,39 @@ #include "mpc10x.h" +static struct mtd_partition linkstation_physmap_partitions[] = { + { + .name = "mtd_firmimg", + .offset = 0x000000, + .size = 0x300000, + }, + { + .name = "mtd_bootcode", + .offset = 0x300000, + .size = 0x070000, + }, + { + .name = "mtd_status", + .offset = 0x370000, + .size = 0x010000, + }, + { + .name = "mtd_conf", + .offset = 0x380000, + .size = 0x080000, + }, + { + .name = "mtd_allflash", + .offset = 0x000000, + .size = 0x400000, + }, + { + .name = "mtd_data", + .offset = 0x310000, + .size = 0x0f0000, + }, +}; + static __initdata struct of_device_id of_bus_ids[] = { { .type = "soc", }, { .compatible = "simple-bus", }, @@ -65,6 +99,10 @@ static int __init linkstation_add_bridge(struct device_node *dev) static void __init linkstation_setup_arch(void) { struct device_node *np; +#ifdef CONFIG_MTD_PHYSMAP + physmap_set_partitions(linkstation_physmap_partitions, + ARRAY_SIZE(linkstation_physmap_partitions)); +#endif /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") diff --git a/trunk/arch/powerpc/platforms/embedded6xx/storcenter.c b/trunk/arch/powerpc/platforms/embedded6xx/storcenter.c index 613070e9ddbe..8864e4884980 100644 --- a/trunk/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/trunk/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,32 @@ #include "mpc10x.h" +#ifdef CONFIG_MTD_PHYSMAP +static struct mtd_partition storcenter_physmap_partitions[] = { + { + .name = "kernel", + .offset = 0x000000, + .size = 0x170000, + }, + { + .name = "rootfs", + .offset = 0x170000, + .size = 0x590000, + }, + { + .name = "uboot", + .offset = 0x700000, + .size = 0x040000, + }, + { + .name = "config", + .offset = 0x740000, + .size = 0x0c0000, + }, +}; +#endif + + static __initdata struct of_device_id storcenter_of_bus[] = { { .name = "soc", }, {}, @@ -69,6 +96,11 @@ static void __init storcenter_setup_arch(void) { struct device_node *np; +#ifdef CONFIG_MTD_PHYSMAP + physmap_set_partitions(storcenter_physmap_partitions, + ARRAY_SIZE(storcenter_physmap_partitions)); +#endif + /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") storcenter_add_bridge(np); diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 7fcf85182681..f5cef3fbf9a5 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -783,11 +783,6 @@ config X86_MCE_AMD Additional support for AMD specific MCE features such as the DRAM Error Threshold. -config X86_MCE_THRESHOLD - depends on X86_MCE_AMD || X86_MCE_INTEL - bool - default y - config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" depends on X86_32 && X86_MCE @@ -931,12 +926,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -config X86_CPU_DEBUG - tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" - ---help--- - If you select this option, this will provide various x86 CPUs - information through debugfs. - choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ @@ -1437,7 +1426,7 @@ config CRASH_DUMP config KEXEC_JUMP bool "kexec jump (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on KEXEC && HIBERNATION + depends on KEXEC && HIBERNATION && X86_32 ---help--- Jump between original kernel and kexeced kernel and invoke code in physical address mode via KEXEC diff --git a/trunk/arch/x86/boot/Makefile b/trunk/arch/x86/boot/Makefile index 57a29fecf6bb..c70eff69a1fb 100644 --- a/trunk/arch/x86/boot/Makefile +++ b/trunk/arch/x86/boot/Makefile @@ -6,23 +6,26 @@ # for more details. # # Copyright (C) 1994 by Linus Torvalds -# Changed by many, many contributors over the years. # # ROOT_DEV specifies the default root-device when making the image. # This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case # the default of FLOPPY is used by 'build'. -ROOT_DEV := CURRENT +ROOT_DEV := CURRENT # If you want to preset the SVGA mode, uncomment the next line and # set SVGA_MODE to whatever number you want. # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup. -SVGA_MODE := -DSVGA_MODE=NORMAL_VGA +SVGA_MODE := -DSVGA_MODE=NORMAL_VGA -targets := vmlinux.bin setup.bin setup.elf bzImage +# If you want the RAM disk device, define this to be the size in blocks. + +#RAMDISK := -DRAMDISK=512 + +targets := vmlinux.bin setup.bin setup.elf zImage bzImage subdir- := compressed setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o @@ -68,13 +71,17 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ KBUILD_CFLAGS += $(call cc-option,-m32) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -$(obj)/bzImage: asflags-y := $(SVGA_MODE) +$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) +$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__ +$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ +$(obj)/bzImage: BUILDFLAGS := -b quiet_cmd_image = BUILD $@ -cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ - $(ROOT_DEV) > $@ +cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ + $(obj)/vmlinux.bin $(ROOT_DEV) > $@ -$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE +$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ + $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' diff --git a/trunk/arch/x86/boot/header.S b/trunk/arch/x86/boot/header.S index 5d84d1c74e4c..7ccff4884a23 100644 --- a/trunk/arch/x86/boot/header.S +++ b/trunk/arch/x86/boot/header.S @@ -24,8 +24,12 @@ #include "boot.h" #include "offsets.h" +SETUPSECTS = 4 /* default nr of setup-sectors */ BOOTSEG = 0x07C0 /* original address of boot-sector */ -SYSSEG = 0x1000 /* historical load address >> 4 */ +SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ +SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */ + /* to be loaded */ +ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */ #ifndef SVGA_MODE #define SVGA_MODE ASK_VGA @@ -93,12 +97,12 @@ bugger_off_msg: .section ".header", "a" .globl hdr hdr: -setup_sects: .byte 0 /* Filled in by build.c */ +setup_sects: .byte SETUPSECTS root_flags: .word ROOT_RDONLY -syssize: .long 0 /* Filled in by build.c */ -ram_size: .word 0 /* Obsolete */ +syssize: .long SYSSIZE +ram_size: .word RAMDISK vid_mode: .word SVGA_MODE -root_dev: .word 0 /* Filled in by build.c */ +root_dev: .word ROOT_DEV boot_flag: .word 0xAA55 # offset 512, entry point @@ -119,15 +123,14 @@ _start: # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG -start_sys_seg: .word SYSSEG # obsolete and meaningless, but just - # in case something decided to "use" it +start_sys_seg: .word SYSSEG .word kernel_version-512 # pointing to kernel version string # above section of header is compatible # with loadlin-1.5 (header v1.5). Don't # change it. -type_of_loader: .byte 0 # 0 means ancient bootloader, newer - # bootloaders know to change this. +type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, + # Bootlin, SYSLX, bootsect...) # See Documentation/i386/boot.txt for # assigned ids @@ -139,7 +142,11 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set # space behind setup.S can be used for # heap purposes. # Only the loader knows what is free +#ifndef __BIG_KERNEL__ + .byte 0 +#else .byte LOADED_HIGH +#endif setup_move_size: .word 0x8000 # size to move, when setup is not # loaded at 0x90000. We will move setup @@ -150,7 +157,11 @@ setup_move_size: .word 0x8000 # size to move, when setup is not code32_start: # here loaders can put a different # start address for 32-bit code. +#ifndef __BIG_KERNEL__ + .long 0x1000 # 0x1000 = default for zImage +#else .long 0x100000 # 0x100000 = default for big kernel +#endif ramdisk_image: .long 0 # address of loaded ramdisk image # Here the loader puts the 32-bit diff --git a/trunk/arch/x86/boot/pm.c b/trunk/arch/x86/boot/pm.c index 8062f8915250..85a1cd8a8ff8 100644 --- a/trunk/arch/x86/boot/pm.c +++ b/trunk/arch/x86/boot/pm.c @@ -32,6 +32,47 @@ static void realmode_switch_hook(void) } } +/* + * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000. + * A bzImage kernel is loaded and runs at 0x100000. + */ +static void move_kernel_around(void) +{ + /* Note: rely on the compile-time option here rather than + the LOADED_HIGH flag. The Qemu kernel loader unconditionally + sets the loadflags to zero. */ +#ifndef __BIG_KERNEL__ + u16 dst_seg, src_seg; + u32 syssize; + + dst_seg = 0x1000 >> 4; + src_seg = 0x10000 >> 4; + syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */ + + while (syssize) { + int paras = (syssize >= 0x1000) ? 0x1000 : syssize; + int dwords = paras << 2; + + asm volatile("pushw %%es ; " + "pushw %%ds ; " + "movw %1,%%es ; " + "movw %2,%%ds ; " + "xorw %%di,%%di ; " + "xorw %%si,%%si ; " + "rep;movsl ; " + "popw %%ds ; " + "popw %%es" + : "+c" (dwords) + : "r" (dst_seg), "r" (src_seg) + : "esi", "edi"); + + syssize -= paras; + dst_seg += paras; + src_seg += paras; + } +#endif +} + /* * Disable all interrupts at the legacy PIC. */ @@ -106,6 +147,9 @@ void go_to_protected_mode(void) /* Hook before leaving real mode, also disables interrupts */ realmode_switch_hook(); + /* Move the kernel/setup to their final resting places */ + move_kernel_around(); + /* Enable the A20 gate */ if (enable_a20()) { puts("A20 gate not responding, unable to boot...\n"); diff --git a/trunk/arch/x86/boot/tools/build.c b/trunk/arch/x86/boot/tools/build.c index ee3a4ea923ac..44dc1923c0e3 100644 --- a/trunk/arch/x86/boot/tools/build.c +++ b/trunk/arch/x86/boot/tools/build.c @@ -130,7 +130,7 @@ static void die(const char * str, ...) static void usage(void) { - die("Usage: build setup system [rootdev] [> image]"); + die("Usage: build [-b] setup system [rootdev] [> image]"); } int main(int argc, char ** argv) @@ -145,6 +145,11 @@ int main(int argc, char ** argv) void *kernel; u32 crc = 0xffffffffUL; + if (argc > 2 && !strcmp(argv[1], "-b")) + { + is_big_kernel = 1; + argc--, argv++; + } if ((argc < 3) || (argc > 4)) usage(); if (argc > 3) { @@ -211,6 +216,8 @@ int main(int argc, char ** argv) die("Unable to mmap '%s': %m", argv[2]); /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ sys_size = (sz + 15 + 4) / 16; + if (!is_big_kernel && sys_size > DEF_SYSSIZE) + die("System is too big. Try using bzImage or modules."); /* Patch the setup code with the appropriate size parameters */ buf[0x1f1] = setup_sectors-1; diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h index 394d177d721b..4ef949c1972e 100644 --- a/trunk/arch/x86/include/asm/apic.h +++ b/trunk/arch/x86/include/asm/apic.h @@ -379,7 +379,6 @@ static inline u32 safe_apic_wait_icr_idle(void) static inline void ack_APIC_irq(void) { -#ifdef CONFIG_X86_LOCAL_APIC /* * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. @@ -387,7 +386,6 @@ static inline void ack_APIC_irq(void) /* Docs say use 0 for future compatibility */ apic_write(APIC_EOI, 0); -#endif } static inline unsigned default_get_apic_id(unsigned long x) diff --git a/trunk/arch/x86/include/asm/apicdef.h b/trunk/arch/x86/include/asm/apicdef.h index bc9514fb3b13..63134e31e8b9 100644 --- a/trunk/arch/x86/include/asm/apicdef.h +++ b/trunk/arch/x86/include/asm/apicdef.h @@ -53,7 +53,6 @@ #define APIC_ESR_SENDILL 0x00020 #define APIC_ESR_RECVILL 0x00040 #define APIC_ESR_ILLREGA 0x00080 -#define APIC_LVTCMCI 0x2f0 #define APIC_ICR 0x300 #define APIC_DEST_SELF 0x40000 #define APIC_DEST_ALLINC 0x80000 diff --git a/trunk/arch/x86/include/asm/boot.h b/trunk/arch/x86/include/asm/boot.h index 6ba23dd9fc92..6526cf08b0e4 100644 --- a/trunk/arch/x86/include/asm/boot.h +++ b/trunk/arch/x86/include/asm/boot.h @@ -1,6 +1,10 @@ #ifndef _ASM_X86_BOOT_H #define _ASM_X86_BOOT_H +/* Don't touch these, unless you really know what you're doing. */ +#define DEF_SYSSEG 0x1000 +#define DEF_SYSSIZE 0x7F00 + /* Internal svga startup constants */ #define NORMAL_VGA 0xffff /* 80x25 mode */ #define EXTENDED_VGA 0xfffe /* 80x50 mode */ diff --git a/trunk/arch/x86/include/asm/cpu_debug.h b/trunk/arch/x86/include/asm/cpu_debug.h deleted file mode 100755 index d24d64fcee04..000000000000 --- a/trunk/arch/x86/include/asm/cpu_debug.h +++ /dev/null @@ -1,193 +0,0 @@ -#ifndef _ASM_X86_CPU_DEBUG_H -#define _ASM_X86_CPU_DEBUG_H - -/* - * CPU x86 architecture debug - * - * Copyright(C) 2009 Jaswinder Singh Rajput - */ - -/* Register flags */ -enum cpu_debug_bit { -/* Model Specific Registers (MSRs) */ - CPU_MC_BIT, /* Machine Check */ - CPU_MONITOR_BIT, /* Monitor */ - CPU_TIME_BIT, /* Time */ - CPU_PMC_BIT, /* Performance Monitor */ - CPU_PLATFORM_BIT, /* Platform */ - CPU_APIC_BIT, /* APIC */ - CPU_POWERON_BIT, /* Power-on */ - CPU_CONTROL_BIT, /* Control */ - CPU_FEATURES_BIT, /* Features control */ - CPU_LBRANCH_BIT, /* Last Branch */ - CPU_BIOS_BIT, /* BIOS */ - CPU_FREQ_BIT, /* Frequency */ - CPU_MTTR_BIT, /* MTRR */ - CPU_PERF_BIT, /* Performance */ - CPU_CACHE_BIT, /* Cache */ - CPU_SYSENTER_BIT, /* Sysenter */ - CPU_THERM_BIT, /* Thermal */ - CPU_MISC_BIT, /* Miscellaneous */ - CPU_DEBUG_BIT, /* Debug */ - CPU_PAT_BIT, /* PAT */ - CPU_VMX_BIT, /* VMX */ - CPU_CALL_BIT, /* System Call */ - CPU_BASE_BIT, /* BASE Address */ - CPU_SMM_BIT, /* System mgmt mode */ - CPU_SVM_BIT, /*Secure Virtual Machine*/ - CPU_OSVM_BIT, /* OS-Visible Workaround*/ -/* Standard Registers */ - CPU_TSS_BIT, /* Task Stack Segment */ - CPU_CR_BIT, /* Control Registers */ - CPU_DT_BIT, /* Descriptor Table */ -/* End of Registers flags */ - CPU_REG_ALL_BIT, /* Select all Registers */ -}; - -#define CPU_REG_ALL (~0) /* Select all Registers */ - -#define CPU_MC (1 << CPU_MC_BIT) -#define CPU_MONITOR (1 << CPU_MONITOR_BIT) -#define CPU_TIME (1 << CPU_TIME_BIT) -#define CPU_PMC (1 << CPU_PMC_BIT) -#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) -#define CPU_APIC (1 << CPU_APIC_BIT) -#define CPU_POWERON (1 << CPU_POWERON_BIT) -#define CPU_CONTROL (1 << CPU_CONTROL_BIT) -#define CPU_FEATURES (1 << CPU_FEATURES_BIT) -#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) -#define CPU_BIOS (1 << CPU_BIOS_BIT) -#define CPU_FREQ (1 << CPU_FREQ_BIT) -#define CPU_MTRR (1 << CPU_MTTR_BIT) -#define CPU_PERF (1 << CPU_PERF_BIT) -#define CPU_CACHE (1 << CPU_CACHE_BIT) -#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) -#define CPU_THERM (1 << CPU_THERM_BIT) -#define CPU_MISC (1 << CPU_MISC_BIT) -#define CPU_DEBUG (1 << CPU_DEBUG_BIT) -#define CPU_PAT (1 << CPU_PAT_BIT) -#define CPU_VMX (1 << CPU_VMX_BIT) -#define CPU_CALL (1 << CPU_CALL_BIT) -#define CPU_BASE (1 << CPU_BASE_BIT) -#define CPU_SMM (1 << CPU_SMM_BIT) -#define CPU_SVM (1 << CPU_SVM_BIT) -#define CPU_OSVM (1 << CPU_OSVM_BIT) -#define CPU_TSS (1 << CPU_TSS_BIT) -#define CPU_CR (1 << CPU_CR_BIT) -#define CPU_DT (1 << CPU_DT_BIT) - -/* Register file flags */ -enum cpu_file_bit { - CPU_INDEX_BIT, /* index */ - CPU_VALUE_BIT, /* value */ -}; - -#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) - -/* - * DisplayFamily_DisplayModel Processor Families/Processor Number Series - * -------------------------- ------------------------------------------ - * 05_01, 05_02, 05_04 Pentium, Pentium with MMX - * - * 06_01 Pentium Pro - * 06_03, 06_05 Pentium II Xeon, Pentium II - * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III - * - * 06_09, 060D Pentium M - * - * 06_0E Core Duo, Core Solo - * - * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series, - * Core 2 Quad, Core 2 Extreme, Core 2 Duo, - * Pentium dual-core - * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650 - * - * 06_1C Atom - * - * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4 - * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D - * - * 0F_06 Xeon 7100, 5000 Series, Xeon MP, - * Pentium 4, Pentium D - */ - -/* Register processors bits */ -enum cpu_processor_bit { - CPU_NONE, -/* Intel */ - CPU_INTEL_PENTIUM_BIT, - CPU_INTEL_P6_BIT, - CPU_INTEL_PENTIUM_M_BIT, - CPU_INTEL_CORE_BIT, - CPU_INTEL_CORE2_BIT, - CPU_INTEL_ATOM_BIT, - CPU_INTEL_XEON_P4_BIT, - CPU_INTEL_XEON_MP_BIT, -}; - -#define CPU_ALL (~0) /* Select all CPUs */ - -#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) -#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) -#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) -#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT) -#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT) -#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT) -#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT) -#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT) - -#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M) -#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2) -#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP) -#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM) -#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM) -#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM) -#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON) -#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON) -#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT) -#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON) -#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON) -#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT) -#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX) -#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE) -#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE) -#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT) -#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE) -#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) -#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) - -/* Select all Intel CPUs*/ -#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) - -#define MAX_CPU_FILES 512 - -struct cpu_private { - unsigned cpu; - unsigned type; - unsigned reg; - unsigned file; -}; - -struct cpu_debug_base { - char *name; /* Register name */ - unsigned flag; /* Register flag */ -}; - -struct cpu_cpuX_base { - struct dentry *dentry; /* Register dentry */ - int init; /* Register index file */ -}; - -struct cpu_file_base { - char *name; /* Register file name */ - unsigned flag; /* Register file flag */ -}; - -struct cpu_debug_range { - unsigned min; /* Register range min */ - unsigned max; /* Register range max */ - unsigned flag; /* Supported flags */ - unsigned model; /* Supported models */ -}; - -#endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/trunk/arch/x86/include/asm/desc.h b/trunk/arch/x86/include/asm/desc.h index 5623c50d67b2..dc27705f5443 100644 --- a/trunk/arch/x86/include/asm/desc.h +++ b/trunk/arch/x86/include/asm/desc.h @@ -91,6 +91,7 @@ static inline int desc_empty(const void *ptr) #define store_gdt(dtr) native_store_gdt(dtr) #define store_idt(dtr) native_store_idt(dtr) #define store_tr(tr) (tr = native_store_tr()) +#define store_ldt(ldt) asm("sldt %0":"=m" (ldt)) #define load_TLS(t, cpu) native_load_tls(t, cpu) #define set_ldt native_set_ldt @@ -111,8 +112,6 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) } #endif /* CONFIG_PARAVIRT */ -#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) - static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { diff --git a/trunk/arch/x86/include/asm/entry_arch.h b/trunk/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..854d538ae857 100644 --- a/trunk/arch/x86/include/asm/entry_arch.h +++ b/trunk/arch/x86/include/asm/entry_arch.h @@ -33,8 +33,6 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, smp_invalidate_interrupt) #endif -BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) - /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/trunk/arch/x86/include/asm/fixmap.h b/trunk/arch/x86/include/asm/fixmap.h index 63a79c77d220..dca8f03da5b2 100644 --- a/trunk/arch/x86/include/asm/fixmap.h +++ b/trunk/arch/x86/include/asm/fixmap.h @@ -24,6 +24,9 @@ #include #else #include +#ifdef CONFIG_EFI +#include +#endif #endif /* @@ -89,6 +92,13 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif +#ifdef CONFIG_X86_64 +#ifdef CONFIG_EFI + FIX_EFI_IO_MAP_LAST_PAGE, + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + + MAX_EFI_IO_PAGES - 1, +#endif +#endif #ifdef CONFIG_X86_VISWS_APIC FIX_CO_CPU, /* Cobalt timer */ FIX_CO_APIC, /* Cobalt APIC Redirection Table */ diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h index 039db6aa8e02..176f058e7159 100644 --- a/trunk/arch/x86/include/asm/hardirq.h +++ b/trunk/arch/x86/include/asm/hardirq.h @@ -12,7 +12,6 @@ typedef struct { unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq_spurious_count; #endif - unsigned int generic_irqs; /* arch dependent */ #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/trunk/arch/x86/include/asm/highmem.h b/trunk/arch/x86/include/asm/highmem.h index 014c2b85ae45..bf9276bea660 100644 --- a/trunk/arch/x86/include/asm/highmem.h +++ b/trunk/arch/x86/include/asm/highmem.h @@ -63,7 +63,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); #ifndef CONFIG_PARAVIRT diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h index b762ea49bd70..370e1c83bb49 100644 --- a/trunk/arch/x86/include/asm/hw_irq.h +++ b/trunk/arch/x86/include/asm/hw_irq.h @@ -27,7 +27,6 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); -extern void generic_interrupt(void); extern void error_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/trunk/arch/x86/include/asm/init.h b/trunk/arch/x86/include/asm/init.h deleted file mode 100644 index 36fb1a6a5109..000000000000 --- a/trunk/arch/x86/include/asm/init.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _ASM_X86_INIT_32_H -#define _ASM_X86_INIT_32_H - -#ifdef CONFIG_X86_32 -extern void __init early_ioremap_page_table_range_init(void); -#endif - -extern unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask); - - -extern unsigned long __initdata e820_table_start; -extern unsigned long __meminitdata e820_table_end; -extern unsigned long __meminitdata e820_table_top; - -#endif /* _ASM_X86_INIT_32_H */ diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h index f38481bcd455..107eb2196691 100644 --- a/trunk/arch/x86/include/asm/irq.h +++ b/trunk/arch/x86/include/asm/irq.h @@ -36,7 +36,6 @@ static inline int irq_canonicalize(int irq) extern void fixup_irqs(void); #endif -extern void (*generic_interrupt_extension)(void); extern void init_IRQ(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..8a285f356f8a 100644 --- a/trunk/arch/x86/include/asm/irq_vectors.h +++ b/trunk/arch/x86/include/asm/irq_vectors.h @@ -111,11 +111,6 @@ */ #define LOCAL_PERF_VECTOR 0xee -/* - * Generic system vector for platform specific use - */ -#define GENERIC_INTERRUPT_VECTOR 0xed - /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/trunk/arch/x86/include/asm/kexec.h b/trunk/arch/x86/include/asm/kexec.h index 317ff1703d0b..0ceb6d19ed30 100644 --- a/trunk/arch/x86/include/asm/kexec.h +++ b/trunk/arch/x86/include/asm/kexec.h @@ -9,13 +9,13 @@ # define PAGES_NR 4 #else # define PA_CONTROL_PAGE 0 -# define VA_CONTROL_PAGE 1 -# define PA_TABLE_PAGE 2 -# define PA_SWAP_PAGE 3 -# define PAGES_NR 4 +# define PA_TABLE_PAGE 1 +# define PAGES_NR 2 #endif +#ifdef CONFIG_X86_32 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +#endif #ifndef __ASSEMBLY__ @@ -136,11 +136,10 @@ relocate_kernel(unsigned long indirection_page, unsigned int has_pae, unsigned int preserve_context); #else -unsigned long +NORET_TYPE void relocate_kernel(unsigned long indirection_page, unsigned long page_list, - unsigned long start_address, - unsigned int preserve_context); + unsigned long start_address) ATTRIB_NORET; #endif #define ARCH_HAS_KIMAGE_ARCH diff --git a/trunk/arch/x86/include/asm/linkage.h b/trunk/arch/x86/include/asm/linkage.h index 12d55e773eb6..9320e2a8a26a 100644 --- a/trunk/arch/x86/include/asm/linkage.h +++ b/trunk/arch/x86/include/asm/linkage.h @@ -1,11 +1,14 @@ #ifndef _ASM_X86_LINKAGE_H #define _ASM_X86_LINKAGE_H -#include - #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_X86_64 +#define __ALIGN .p2align 4,,15 +#define __ALIGN_STR ".p2align 4,,15" +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) /* @@ -47,20 +50,16 @@ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ "g" (arg4), "g" (arg5), "g" (arg6)) -#endif /* CONFIG_X86_32 */ - -#ifdef __ASSEMBLY__ +#endif #define GLOBAL(name) \ .globl name; \ name: -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) -#define __ALIGN .p2align 4, 0x90 -#define __ALIGN_STR __stringify(__ALIGN) +#ifdef CONFIG_X86_ALIGNMENT_16 +#define __ALIGN .align 16,0x90 +#define __ALIGN_STR ".align 16,0x90" #endif -#endif /* __ASSEMBLY__ */ - #endif /* _ASM_X86_LINKAGE_H */ diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h index 563933e06a35..32c6e17b960b 100644 --- a/trunk/arch/x86/include/asm/mce.h +++ b/trunk/arch/x86/include/asm/mce.h @@ -11,8 +11,6 @@ */ #define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ #define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ @@ -92,29 +90,14 @@ extern int mce_disabled; #include -void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, device_mce); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); -/* - * To support more than 128 would need to escape the predefined - * Linux defined extended banks first. - */ -#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) - #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); -void cmci_clear(void); -void cmci_reenable(void); -void cmci_rediscover(int dying); -void cmci_recheck(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } -static inline void cmci_clear(void) {} -static inline void cmci_reenable(void) {} -static inline void cmci_rediscover(int dying) {} -static inline void cmci_recheck(void) {} #endif #ifdef CONFIG_X86_MCE_AMD @@ -123,23 +106,11 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif -extern int mce_available(struct cpuinfo_x86 *c); - -void mce_log_therm_throt_event(__u64 status); +void mce_log_therm_throt_event(unsigned int cpu, __u64 status); extern atomic_t mce_entry; extern void do_machine_check(struct pt_regs *, long); - -typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); -DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); - -enum mcp_flags { - MCP_TIMESTAMP = (1 << 0), /* log time stamp */ - MCP_UC = (1 << 1), /* log uncorrected errors */ -}; -extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); - extern int mce_notify_user(void); #endif /* !CONFIG_X86_32 */ @@ -149,8 +120,8 @@ extern void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do { } while (0) #endif - -extern void (*mce_threshold_vector)(void); +extern void stop_mce(void); +extern void restart_mce(void); #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/trunk/arch/x86/include/asm/msr-index.h b/trunk/arch/x86/include/asm/msr-index.h index 2dbd2314139e..358acc59ae04 100644 --- a/trunk/arch/x86/include/asm/msr-index.h +++ b/trunk/arch/x86/include/asm/msr-index.h @@ -77,11 +77,6 @@ #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 -/* These are consecutive and not in the normal 4er MCE bank block */ -#define MSR_IA32_MC0_CTL2 0x00000280 -#define CMCI_EN (1ULL << 30) -#define CMCI_THRESHOLD_MASK 0xffffULL - #define MSR_P6_PERFCTR0 0x000000c1 #define MSR_P6_PERFCTR1 0x000000c2 #define MSR_P6_EVNTSEL0 0x00000186 diff --git a/trunk/arch/x86/include/asm/page_types.h b/trunk/arch/x86/include/asm/page_types.h index 826ad37006ab..2d625da6603c 100644 --- a/trunk/arch/x86/include/asm/page_types.h +++ b/trunk/arch/x86/include/asm/page_types.h @@ -40,8 +40,14 @@ #ifndef __ASSEMBLY__ +struct pgprot; + extern int page_is_ram(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + struct pgprot vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + struct pgprot vma_prot); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; diff --git a/trunk/arch/x86/include/asm/pat.h b/trunk/arch/x86/include/asm/pat.h index 2cd07b9422f4..b0e70056838e 100644 --- a/trunk/arch/x86/include/asm/pat.h +++ b/trunk/arch/x86/include/asm/pat.h @@ -2,7 +2,6 @@ #define _ASM_X86_PAT_H #include -#include #ifdef CONFIG_X86_PAT extern int pat_enabled; @@ -18,9 +17,5 @@ extern int free_memtype(u64 start, u64 end); extern int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flag); -extern void map_devmem(unsigned long pfn, unsigned long size, - struct pgprot vma_prot); -extern void unmap_devmem(unsigned long pfn, unsigned long size, - struct pgprot vma_prot); #endif /* _ASM_X86_PAT_H */ diff --git a/trunk/arch/x86/include/asm/pgtable_32_types.h b/trunk/arch/x86/include/asm/pgtable_32_types.h index 2733fad45f98..bd8df3b2fe04 100644 --- a/trunk/arch/x86/include/asm/pgtable_32_types.h +++ b/trunk/arch/x86/include/asm/pgtable_32_types.h @@ -25,11 +25,6 @@ * area for the same reason. ;) */ #define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLER__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - #define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 diff --git a/trunk/arch/x86/include/asm/pgtable_types.h b/trunk/arch/x86/include/asm/pgtable_types.h index b8238dc8786d..4d258ad76a0f 100644 --- a/trunk/arch/x86/include/asm/pgtable_types.h +++ b/trunk/arch/x86/include/asm/pgtable_types.h @@ -273,7 +273,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern int nx_enabled; -extern void set_nx(void); #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/trunk/arch/x86/include/asm/uv/uv_hub.h b/trunk/arch/x86/include/asm/uv/uv_hub.h index 9f4dfba33b28..777327ef05c1 100644 --- a/trunk/arch/x86/include/asm/uv/uv_hub.h +++ b/trunk/arch/x86/include/asm/uv/uv_hub.h @@ -199,10 +199,6 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define SCIR_CPU_ACTIVITY 0x02 /* not idle */ #define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */ -/* Loop through all installed blades */ -#define for_each_possible_blade(bid) \ - for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++) - /* * Macros for converting between kernel virtual addresses, socket local physical * addresses, and UV global physical addresses. diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index 339ce35648e6..95f216bbfaf1 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -111,7 +111,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o + obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c index 4c80f1557433..6907b8e85d52 100644 --- a/trunk/arch/x86/kernel/alternative.c +++ b/trunk/arch/x86/kernel/alternative.c @@ -414,17 +414,9 @@ void __init alternative_instructions(void) that might execute the to be patched code. Other CPUs are not running. */ stop_nmi(); - - /* - * Don't stop machine check exceptions while patching. - * MCEs only happen when something got corrupted and in this - * case we must do something about the corruption. - * Ignoring it is worse than a unlikely patching race. - * Also machine checks tend to be broadcast and if one CPU - * goes into machine check the others follow quickly, so we don't - * expect a machine check to cause undue problems during to code - * patching. - */ +#ifdef CONFIG_X86_MCE + stop_mce(); +#endif apply_alternatives(__alt_instructions, __alt_instructions_end); @@ -464,6 +456,9 @@ void __init alternative_instructions(void) (unsigned long)__smp_locks_end); restart_nmi(); +#ifdef CONFIG_X86_MCE + restart_mce(); +#endif } /** diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c index 30909a258d0f..f9cecdfd05c5 100644 --- a/trunk/arch/x86/kernel/apic/apic.c +++ b/trunk/arch/x86/kernel/apic/apic.c @@ -46,7 +46,6 @@ #include #include #include -#include unsigned int num_processors; @@ -843,14 +842,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); } #endif -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 6) { - v = apic_read(APIC_LVTCMCI); - if (!(v & APIC_LVT_MASKED)) - apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); - } -#endif - /* * Clean APIC state for other OSs: */ @@ -1250,12 +1241,6 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_LVT1, value); preempt_enable(); - -#ifdef CONFIG_X86_MCE_INTEL - /* Recheck CMCI information after local APIC is up on CPU #0 */ - if (smp_processor_id() == 0) - cmci_recheck(); -#endif } void __cpuinit end_local_APIC_setup(void) diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile index d4356f8b7522..82db7f45e2de 100644 --- a/trunk/arch/x86/kernel/cpu/Makefile +++ b/trunk/arch/x86/kernel/cpu/Makefile @@ -14,8 +14,6 @@ obj-y += vmware.o hypervisor.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c index f47df59016c5..25423a5b80ed 100644 --- a/trunk/arch/x86/kernel/cpu/amd.c +++ b/trunk/arch/x86/kernel/cpu/amd.c @@ -5,7 +5,6 @@ #include #include #include -#include #ifdef CONFIG_X86_64 # include @@ -142,55 +141,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) } } -static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - /* calling is from identify_secondary_cpu() ? */ - if (c->cpu_index == boot_cpu_id) - return; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability - * bit. It's worth noting that the A5 stepping (662) of some - * Athlon XP's have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for - * more. - */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || - (c->x86_model > 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, not a certified SMP capable AMD system. */ - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - WARN_ONCE(1, "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); - if (!test_taint(TAINT_UNSAFE_SMP)) - add_taint(TAINT_UNSAFE_SMP); - -valid_k7: - ; -#endif -} - static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) { u32 l, h; @@ -225,8 +175,6 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) } set_cpu_cap(c, X86_FEATURE_K7); - - amd_k7_smp_check(c); } #endif diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c index f8869978bbb7..826d5c876278 100644 --- a/trunk/arch/x86/kernel/cpu/common.c +++ b/trunk/arch/x86/kernel/cpu/common.c @@ -1078,7 +1078,8 @@ void __cpuinit cpu_init(void) atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; - BUG_ON(me->mm); + if (me->mm) + BUG(); enter_lazy_tlb(&init_mm, me); load_sp0(t, ¤t->thread); @@ -1144,7 +1145,8 @@ void __cpuinit cpu_init(void) */ atomic_inc(&init_mm.mm_count); curr->active_mm = &init_mm; - BUG_ON(curr->mm); + if (curr->mm) + BUG(); enter_lazy_tlb(&init_mm, curr); load_sp0(t, thread); diff --git a/trunk/arch/x86/kernel/cpu/cpu_debug.c b/trunk/arch/x86/kernel/cpu/cpu_debug.c deleted file mode 100755 index 9abbcbd933cc..000000000000 --- a/trunk/arch/x86/kernel/cpu/cpu_debug.c +++ /dev/null @@ -1,785 +0,0 @@ -/* - * CPU x86 architecture debug code - * - * Copyright(C) 2009 Jaswinder Singh Rajput - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); -static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); -static DEFINE_PER_CPU(unsigned, cpu_modelflag); -static DEFINE_PER_CPU(int, cpu_priv_count); -static DEFINE_PER_CPU(unsigned, cpu_model); - -static DEFINE_MUTEX(cpu_debug_lock); - -static struct dentry *cpu_debugfs_dir; - -static struct cpu_debug_base cpu_base[] = { - { "mc", CPU_MC }, /* Machine Check */ - { "monitor", CPU_MONITOR }, /* Monitor */ - { "time", CPU_TIME }, /* Time */ - { "pmc", CPU_PMC }, /* Performance Monitor */ - { "platform", CPU_PLATFORM }, /* Platform */ - { "apic", CPU_APIC }, /* APIC */ - { "poweron", CPU_POWERON }, /* Power-on */ - { "control", CPU_CONTROL }, /* Control */ - { "features", CPU_FEATURES }, /* Features control */ - { "lastbranch", CPU_LBRANCH }, /* Last Branch */ - { "bios", CPU_BIOS }, /* BIOS */ - { "freq", CPU_FREQ }, /* Frequency */ - { "mtrr", CPU_MTRR }, /* MTRR */ - { "perf", CPU_PERF }, /* Performance */ - { "cache", CPU_CACHE }, /* Cache */ - { "sysenter", CPU_SYSENTER }, /* Sysenter */ - { "therm", CPU_THERM }, /* Thermal */ - { "misc", CPU_MISC }, /* Miscellaneous */ - { "debug", CPU_DEBUG }, /* Debug */ - { "pat", CPU_PAT }, /* PAT */ - { "vmx", CPU_VMX }, /* VMX */ - { "call", CPU_CALL }, /* System Call */ - { "base", CPU_BASE }, /* BASE Address */ - { "smm", CPU_SMM }, /* System mgmt mode */ - { "svm", CPU_SVM }, /*Secure Virtial Machine*/ - { "osvm", CPU_OSVM }, /* OS-Visible Workaround*/ - { "tss", CPU_TSS }, /* Task Stack Segment */ - { "cr", CPU_CR }, /* Control Registers */ - { "dt", CPU_DT }, /* Descriptor Table */ - { "registers", CPU_REG_ALL }, /* Select all Registers */ -}; - -static struct cpu_file_base cpu_file[] = { - { "index", CPU_REG_ALL }, /* index */ - { "value", CPU_REG_ALL }, /* value */ -}; - -/* Intel Registers Range */ -static struct cpu_debug_range cpu_intel_range[] = { - { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, - { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, - { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, - { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, - { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, - - { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, - { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, - { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, - { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, - - { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, - { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, - { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, - { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, - - { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, - { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, - { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, - { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, - - { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, - { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, - { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, - - { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, - { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, - { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, - { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, - { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, - { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, - { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, - { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, - { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, - { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, - { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, - { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, - - { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, - { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, - { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, - { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, - { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, - { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, - - { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, - { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, - { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, - { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, - { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, - { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, - { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, - { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, - - { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, - { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, - { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, - { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, - { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, - { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, - { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE }, - - { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE }, - { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON }, - - { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON }, - { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON }, - { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON }, -}; - -/* AMD Registers Range */ -static struct cpu_debug_range cpu_amd_range[] = { - { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, }, - { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, }, - { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, }, - { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, }, - - { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, }, - { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, }, - { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, }, - - { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, }, - - { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, }, - { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, }, - { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, }, - { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, }, - { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, }, - { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, }, - { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, }, - { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, }, - { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, }, - { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, }, - { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, }, - { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, }, - { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, }, -}; - - -static int get_cpu_modelflag(unsigned cpu) -{ - int flag; - - switch (per_cpu(cpu_model, cpu)) { - /* Intel */ - case 0x0501: - case 0x0502: - case 0x0504: - flag = CPU_INTEL_PENTIUM; - break; - case 0x0601: - case 0x0603: - case 0x0605: - case 0x0607: - case 0x0608: - case 0x060A: - case 0x060B: - flag = CPU_INTEL_P6; - break; - case 0x0609: - case 0x060D: - flag = CPU_INTEL_PENTIUM_M; - break; - case 0x060E: - flag = CPU_INTEL_CORE; - break; - case 0x060F: - case 0x0617: - flag = CPU_INTEL_CORE2; - break; - case 0x061C: - flag = CPU_INTEL_ATOM; - break; - case 0x0F00: - case 0x0F01: - case 0x0F02: - case 0x0F03: - case 0x0F04: - flag = CPU_INTEL_XEON_P4; - break; - case 0x0F06: - flag = CPU_INTEL_XEON_MP; - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -static int get_cpu_range_count(unsigned cpu) -{ - int index; - - switch (per_cpu(cpu_model, cpu) >> 16) { - case X86_VENDOR_INTEL: - index = ARRAY_SIZE(cpu_intel_range); - break; - case X86_VENDOR_AMD: - index = ARRAY_SIZE(cpu_amd_range); - break; - default: - index = 0; - break; - } - - return index; -} - -static int is_typeflag_valid(unsigned cpu, unsigned flag) -{ - unsigned vendor, modelflag; - int i, index; - - /* Standard Registers should be always valid */ - if (flag >= CPU_TSS) - return 1; - - modelflag = per_cpu(cpu_modelflag, cpu); - vendor = per_cpu(cpu_model, cpu) >> 16; - index = get_cpu_range_count(cpu); - - for (i = 0; i < index; i++) { - switch (vendor) { - case X86_VENDOR_INTEL: - if ((cpu_intel_range[i].model & modelflag) && - (cpu_intel_range[i].flag & flag)) - return 1; - break; - case X86_VENDOR_AMD: - if (cpu_amd_range[i].flag & flag) - return 1; - break; - } - } - - /* Invalid */ - return 0; -} - -static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, - int index, unsigned flag) -{ - unsigned modelflag; - - modelflag = per_cpu(cpu_modelflag, cpu); - *max = 0; - switch (per_cpu(cpu_model, cpu) >> 16) { - case X86_VENDOR_INTEL: - if ((cpu_intel_range[index].model & modelflag) && - (cpu_intel_range[index].flag & flag)) { - *min = cpu_intel_range[index].min; - *max = cpu_intel_range[index].max; - } - break; - case X86_VENDOR_AMD: - if (cpu_amd_range[index].flag & flag) { - *min = cpu_amd_range[index].min; - *max = cpu_amd_range[index].max; - } - break; - } - - return *max; -} - -/* This function can also be called with seq = NULL for printk */ -static void print_cpu_data(struct seq_file *seq, unsigned type, - u32 low, u32 high) -{ - struct cpu_private *priv; - u64 val = high; - - if (seq) { - priv = seq->private; - if (priv->file) { - val = (val << 32) | low; - seq_printf(seq, "0x%llx\n", val); - } else - seq_printf(seq, " %08x: %08x_%08x\n", - type, high, low); - } else - printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); -} - -/* This function can also be called with seq = NULL for printk */ -static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) -{ - unsigned msr, msr_min, msr_max; - struct cpu_private *priv; - u32 low, high; - int i, range; - - if (seq) { - priv = seq->private; - if (priv->file) { - if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, - &low, &high)) - print_cpu_data(seq, priv->reg, low, high); - return; - } - } - - range = get_cpu_range_count(cpu); - - for (i = 0; i < range; i++) { - if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) - continue; - - for (msr = msr_min; msr <= msr_max; msr++) { - if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) - continue; - print_cpu_data(seq, msr, low, high); - } - } -} - -static void print_tss(void *arg) -{ - struct pt_regs *regs = task_pt_regs(current); - struct seq_file *seq = arg; - unsigned int seg; - - seq_printf(seq, " RAX\t: %016lx\n", regs->ax); - seq_printf(seq, " RBX\t: %016lx\n", regs->bx); - seq_printf(seq, " RCX\t: %016lx\n", regs->cx); - seq_printf(seq, " RDX\t: %016lx\n", regs->dx); - - seq_printf(seq, " RSI\t: %016lx\n", regs->si); - seq_printf(seq, " RDI\t: %016lx\n", regs->di); - seq_printf(seq, " RBP\t: %016lx\n", regs->bp); - seq_printf(seq, " ESP\t: %016lx\n", regs->sp); - -#ifdef CONFIG_X86_64 - seq_printf(seq, " R08\t: %016lx\n", regs->r8); - seq_printf(seq, " R09\t: %016lx\n", regs->r9); - seq_printf(seq, " R10\t: %016lx\n", regs->r10); - seq_printf(seq, " R11\t: %016lx\n", regs->r11); - seq_printf(seq, " R12\t: %016lx\n", regs->r12); - seq_printf(seq, " R13\t: %016lx\n", regs->r13); - seq_printf(seq, " R14\t: %016lx\n", regs->r14); - seq_printf(seq, " R15\t: %016lx\n", regs->r15); -#endif - - asm("movl %%cs,%0" : "=r" (seg)); - seq_printf(seq, " CS\t: %04x\n", seg); - asm("movl %%ds,%0" : "=r" (seg)); - seq_printf(seq, " DS\t: %04x\n", seg); - seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); - asm("movl %%es,%0" : "=r" (seg)); - seq_printf(seq, " ES\t: %04x\n", seg); - asm("movl %%fs,%0" : "=r" (seg)); - seq_printf(seq, " FS\t: %04x\n", seg); - asm("movl %%gs,%0" : "=r" (seg)); - seq_printf(seq, " GS\t: %04x\n", seg); - - seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); - - seq_printf(seq, " EIP\t: %016lx\n", regs->ip); -} - -static void print_cr(void *arg) -{ - struct seq_file *seq = arg; - - seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); - seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); - seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); - seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); -#ifdef CONFIG_X86_64 - seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); -#endif -} - -static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) -{ - seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); -} - -static void print_dt(void *seq) -{ - struct desc_ptr dt; - unsigned long ldt; - - /* IDT */ - store_idt((struct desc_ptr *)&dt); - print_desc_ptr("IDT", seq, dt); - - /* GDT */ - store_gdt((struct desc_ptr *)&dt); - print_desc_ptr("GDT", seq, dt); - - /* LDT */ - store_ldt(ldt); - seq_printf(seq, " LDT\t: %016lx\n", ldt); - - /* TR */ - store_tr(ldt); - seq_printf(seq, " TR\t: %016lx\n", ldt); -} - -static void print_dr(void *arg) -{ - struct seq_file *seq = arg; - unsigned long dr; - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - get_debugreg(dr, i); - seq_printf(seq, " dr%d\t: %016lx\n", i, dr); - } - - seq_printf(seq, "\n MSR\t:\n"); -} - -static void print_apic(void *arg) -{ - struct seq_file *seq = arg; - -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(seq, " LAPIC\t:\n"); - seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); - seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); - seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); - seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); - seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); - seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); - seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); - seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); - seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); - seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); - seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); - seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); - seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); - seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); - seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); - seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); - seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); - seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); - seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); - seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); - seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); -#endif /* CONFIG_X86_LOCAL_APIC */ - - seq_printf(seq, "\n MSR\t:\n"); -} - -static int cpu_seq_show(struct seq_file *seq, void *v) -{ - struct cpu_private *priv = seq->private; - - if (priv == NULL) - return -EINVAL; - - switch (cpu_base[priv->type].flag) { - case CPU_TSS: - smp_call_function_single(priv->cpu, print_tss, seq, 1); - break; - case CPU_CR: - smp_call_function_single(priv->cpu, print_cr, seq, 1); - break; - case CPU_DT: - smp_call_function_single(priv->cpu, print_dt, seq, 1); - break; - case CPU_DEBUG: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_dr, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - case CPU_APIC: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_apic, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - - default: - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - } - seq_printf(seq, "\n"); - - return 0; -} - -static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos == 0) /* One time is enough ;-) */ - return seq; - - return NULL; -} - -static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - - return cpu_seq_start(seq, pos); -} - -static void cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations cpu_seq_ops = { - .start = cpu_seq_start, - .next = cpu_seq_next, - .stop = cpu_seq_stop, - .show = cpu_seq_show, -}; - -static int cpu_seq_open(struct inode *inode, struct file *file) -{ - struct cpu_private *priv = inode->i_private; - struct seq_file *seq; - int err; - - err = seq_open(file, &cpu_seq_ops); - if (!err) { - seq = file->private_data; - seq->private = priv; - } - - return err; -} - -static const struct file_operations cpu_fops = { - .open = cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, - unsigned file, struct dentry *dentry) -{ - struct cpu_private *priv = NULL; - - /* Already intialized */ - if (file == CPU_INDEX_BIT) - if (per_cpu(cpu_arr[type].init, cpu)) - return 0; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv == NULL) - return -ENOMEM; - - priv->cpu = cpu; - priv->type = type; - priv->reg = reg; - priv->file = file; - mutex_lock(&cpu_debug_lock); - per_cpu(priv_arr[type], cpu) = priv; - per_cpu(cpu_priv_count, cpu)++; - mutex_unlock(&cpu_debug_lock); - - if (file) - debugfs_create_file(cpu_file[file].name, S_IRUGO, - dentry, (void *)priv, &cpu_fops); - else { - debugfs_create_file(cpu_base[type].name, S_IRUGO, - per_cpu(cpu_arr[type].dentry, cpu), - (void *)priv, &cpu_fops); - mutex_lock(&cpu_debug_lock); - per_cpu(cpu_arr[type].init, cpu) = 1; - mutex_unlock(&cpu_debug_lock); - } - - return 0; -} - -static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, - struct dentry *dentry) -{ - unsigned file; - int err = 0; - - for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { - err = cpu_create_file(cpu, type, reg, file, dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned reg, reg_min, reg_max; - int i, range, err = 0; - char reg_dir[12]; - u32 low, high; - - range = get_cpu_range_count(cpu); - - for (i = 0; i < range; i++) { - if (!get_cpu_range(cpu, ®_min, ®_max, i, - cpu_base[type].flag)) - continue; - - for (reg = reg_min; reg <= reg_max; reg++) { - if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) - continue; - - sprintf(reg_dir, "0x%x", reg); - cpu_dentry = debugfs_create_dir(reg_dir, dentry); - err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); - if (err) - return err; - } - } - - return err; -} - -static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned type; - int err = 0; - - for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { - if (!is_typeflag_valid(cpu, cpu_base[type].flag)) - continue; - cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); - per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; - - if (type < CPU_TSS_BIT) - err = cpu_init_msr(cpu, type, cpu_dentry); - else - err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, - cpu_dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_cpu(void) -{ - struct dentry *cpu_dentry = NULL; - struct cpuinfo_x86 *cpui; - char cpu_dir[12]; - unsigned cpu; - int err = 0; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) { - cpui = &cpu_data(cpu); - if (!cpu_has(cpui, X86_FEATURE_MSR)) - continue; - per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) | - (cpui->x86 << 8) | - (cpui->x86_model)); - per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu); - - sprintf(cpu_dir, "cpu%d", cpu); - cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); - err = cpu_init_allreg(cpu, cpu_dentry); - - pr_info("cpu%d(%d) debug files %d\n", - cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu)); - if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) { - pr_err("Register files count %d exceeds limit %d\n", - per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES); - per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES; - err = -ENFILE; - } - if (err) - return err; - } - - return err; -} - -static int __init cpu_debug_init(void) -{ - cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); - - return cpu_init_cpu(); -} - -static void __exit cpu_debug_exit(void) -{ - int i, cpu; - - if (cpu_debugfs_dir) - debugfs_remove_recursive(cpu_debugfs_dir); - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++) - kfree(per_cpu(priv_arr[i], cpu)); -} - -module_init(cpu_debug_init); -module_exit(cpu_debug_exit); - -MODULE_AUTHOR("Jaswinder Singh Rajput"); -MODULE_DESCRIPTION("CPU Debug module"); -MODULE_LICENSE("GPL"); diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c index 191117f1ad51..25c559ba8d54 100644 --- a/trunk/arch/x86/kernel/cpu/intel.c +++ b/trunk/arch/x86/kernel/cpu/intel.c @@ -13,7 +13,6 @@ #include #include #include -#include #ifdef CONFIG_X86_64 #include @@ -111,28 +110,6 @@ static void __cpuinit trap_init_f00f_bug(void) } #endif -static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - /* calling is from identify_secondary_cpu() ? */ - if (c->cpu_index == boot_cpu_id) - return; - - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) { - /* - * Remember we have B step Pentia with bugs - */ - WARN_ONCE(1, "WARNING: SMP operation may be unreliable" - "with B stepping processors.\n"); - } -#endif -} - static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -209,8 +186,6 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif - - intel_smp_check(c); } #else static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) diff --git a/trunk/arch/x86/kernel/cpu/mcheck/Makefile b/trunk/arch/x86/kernel/cpu/mcheck/Makefile index b2f89829bbe8..d7d2323bbb69 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/Makefile +++ b/trunk/arch/x86/kernel/cpu/mcheck/Makefile @@ -4,4 +4,3 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o -obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c index 3552119b091d..dfaebce3633e 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -60,6 +60,20 @@ void mcheck_init(struct cpuinfo_x86 *c) } } +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + static int __init mcheck_disable(char *str) { mce_disabled = 1; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c index ca14604611ec..fe79985ce0f2 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -3,8 +3,6 @@ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. * Rest from unknown author(s). * 2004 Andi Kleen. Rewrote most of it. - * Copyright 2008 Intel Corporation - * Author: Andi Kleen */ #include @@ -26,9 +24,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -37,6 +32,7 @@ #include #define MISC_MCELOG_MINOR 227 +#define NR_SYSFS_BANKS 6 atomic_t mce_entry; @@ -51,7 +47,7 @@ static int mce_dont_init; */ static int tolerant = 1; static int banks; -static u64 *bank; +static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; @@ -62,19 +58,6 @@ static char *trigger_argv[2] = { trigger, NULL }; static DECLARE_WAIT_QUEUE_HEAD(mce_wait); -/* MCA banks polled by the period polling timer for corrected events */ -DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { - [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL -}; - -/* Do initial initialization of a struct mce */ -void mce_setup(struct mce *m) -{ - memset(m, 0, sizeof(struct mce)); - m->cpu = smp_processor_id(); - rdtscll(m->tsc); -} - /* * Lockless MCE logging infrastructure. * This avoids deadlocks on printk locks without having to break locks. Also @@ -136,11 +119,11 @@ static void print_mce(struct mce *m) print_symbol("{%s}", m->ip); printk("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + printk(KERN_EMERG "TSC %Lx ", m->tsc); if (m->addr) - printk("ADDR %llx ", m->addr); + printk("ADDR %Lx ", m->addr); if (m->misc) - printk("MISC %llx ", m->misc); + printk("MISC %Lx ", m->misc); printk("\n"); printk(KERN_EMERG "This is not a software problem!\n"); printk(KERN_EMERG "Run through mcelog --ascii to decode " @@ -166,10 +149,8 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) panic(msg); } -int mce_available(struct cpuinfo_x86 *c) +static int mce_available(struct cpuinfo_x86 *c) { - if (mce_dont_init) - return 0; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -191,77 +172,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) } /* - * Poll for corrected events or events that happened before reset. - * Those are just logged through /dev/mcelog. - * - * This is executed in standard interrupt context. - */ -void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) -{ - struct mce m; - int i; - - mce_setup(&m); - - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); - for (i = 0; i < banks; i++) { - if (!bank[i] || !test_bit(i, *b)) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - m.tsc = 0; - - barrier(); - rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); - if (!(m.status & MCI_STATUS_VAL)) - continue; - - /* - * Uncorrected events are handled by the exception handler - * when it is enabled. But when the exception is disabled log - * everything. - * - * TBD do the same check for MCI_STATUS_EN here? - */ - if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) - continue; - - if (m.status & MCI_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); - if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); - - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - /* - * Don't get the IP here because it's unlikely to - * have anything to do with the actual error location. - */ - - mce_log(&m); - add_taint(TAINT_MACHINE_CHECK); - - /* - * Clear state for this bank. - */ - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } - - /* - * Don't clear MCG_STATUS here because it's only defined for - * exceptions. - */ -} - -/* - * The actual machine check handler. This only handles real - * exceptions when something got corrupted coming in through int 18. - * - * This is executed in NMI context not subject to normal locking rules. This - * implies that most kernel services cannot be safely used. Don't even - * think about putting a printk in there! + * The actual machine check handler */ void do_machine_check(struct pt_regs * regs, long error_code) { @@ -279,18 +190,17 @@ void do_machine_check(struct pt_regs * regs, long error_code) * error. */ int kill_it = 0; - DECLARE_BITMAP(toclear, MAX_NR_BANKS); atomic_inc(&mce_entry); - if (notify_die(DIE_NMI, "machine check", regs, error_code, + if ((regs + && notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL) == NOTIFY_STOP) - goto out2; - if (!banks) + || !banks) goto out2; - mce_setup(&m); - + memset(&m, 0, sizeof(struct mce)); + m.cpu = smp_processor_id(); rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); /* if the restart IP is not valid, we're done for */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) @@ -300,32 +210,18 @@ void do_machine_check(struct pt_regs * regs, long error_code) barrier(); for (i = 0; i < banks; i++) { - __clear_bit(i, toclear); - if (!bank[i]) + if (i < NR_SYSFS_BANKS && !bank[i]) continue; m.misc = 0; m.addr = 0; m.bank = i; + m.tsc = 0; rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); if ((m.status & MCI_STATUS_VAL) == 0) continue; - /* - * Non uncorrected errors are handled by machine_check_poll - * Leave them alone. - */ - if ((m.status & MCI_STATUS_UC) == 0) - continue; - - /* - * Set taint even when machine check was not enabled. - */ - add_taint(TAINT_MACHINE_CHECK); - - __set_bit(i, toclear); - if (m.status & MCI_STATUS_EN) { /* if PCC was set, there's no way out */ no_way_out |= !!(m.status & MCI_STATUS_PCC); @@ -339,12 +235,6 @@ void do_machine_check(struct pt_regs * regs, long error_code) no_way_out = 1; kill_it = 1; } - } else { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ - continue; } if (m.status & MCI_STATUS_MISCV) @@ -353,7 +243,10 @@ void do_machine_check(struct pt_regs * regs, long error_code) rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); mce_get_rip(&m, regs); - mce_log(&m); + if (error_code >= 0) + rdtscll(m.tsc); + if (error_code != -2) + mce_log(&m); /* Did this bank cause the exception? */ /* Assume that the bank with uncorrectable errors did it, @@ -362,8 +255,14 @@ void do_machine_check(struct pt_regs * regs, long error_code) panicm = m; panicm_found = 1; } + + add_taint(TAINT_MACHINE_CHECK); } + /* Never do anything final in the polling timer */ + if (!regs) + goto out; + /* If we didn't find an uncorrectable error, pick the last one (shouldn't happen, just being safe). */ if (!panicm_found) @@ -410,11 +309,10 @@ void do_machine_check(struct pt_regs * regs, long error_code) /* notify userspace ASAP */ set_thread_flag(TIF_MCE_NOTIFY); + out: /* the last thing we do is clear state */ - for (i = 0; i < banks; i++) { - if (test_bit(i, toclear)) - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); wrmsrl(MSR_IA32_MCG_STATUS, 0); out2: atomic_dec(&mce_entry); @@ -434,13 +332,15 @@ void do_machine_check(struct pt_regs * regs, long error_code) * and historically has been the register value of the * MSR_IA32_THERMAL_STATUS (Intel) msr. */ -void mce_log_therm_throt_event(__u64 status) +void mce_log_therm_throt_event(unsigned int cpu, __u64 status) { struct mce m; - mce_setup(&m); + memset(&m, 0, sizeof(m)); + m.cpu = cpu; m.bank = MCE_THERMAL_BANK; m.status = status; + rdtscll(m.tsc); mce_log(&m); } #endif /* CONFIG_X86_MCE_INTEL */ @@ -453,18 +353,18 @@ void mce_log_therm_throt_event(__u64 status) static int check_interval = 5 * 60; /* 5 minutes */ static int next_interval; /* in jiffies */ -static void mcheck_timer(unsigned long); -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static void mcheck_timer(struct work_struct *work); +static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); -static void mcheck_timer(unsigned long data) +static void mcheck_check_cpu(void *info) { - struct timer_list *t = &per_cpu(mce_timer, data); - - WARN_ON(smp_processor_id() != data); - if (mce_available(¤t_cpu_data)) - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + do_machine_check(NULL, 0); +} + +static void mcheck_timer(struct work_struct *work) +{ + on_each_cpu(mcheck_check_cpu, NULL, 1); /* * Alert userspace if needed. If we logged an MCE, reduce the @@ -477,41 +377,31 @@ static void mcheck_timer(unsigned long data) (int)round_jiffies_relative(check_interval*HZ)); } - t->expires = jiffies + next_interval; - add_timer(t); -} - -static void mce_do_trigger(struct work_struct *work) -{ - call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); + schedule_delayed_work(&mcheck_work, next_interval); } -static DECLARE_WORK(mce_trigger_work, mce_do_trigger); - /* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. + * This is only called from process context. This is where we do + * anything we need to alert userspace about new MCEs. This is called + * directly from the poller and also from entry.S and idle, thanks to + * TIF_MCE_NOTIFY. */ int mce_notify_user(void) { - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - clear_thread_flag(TIF_MCE_NOTIFY); if (test_and_clear_bit(0, ¬ify_user)) { - wake_up_interruptible(&mce_wait); + static unsigned long last_print; + unsigned long now = jiffies; - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (trigger[0] && !work_pending(&mce_trigger_work)) - schedule_work(&mce_trigger_work); + wake_up_interruptible(&mce_wait); + if (trigger[0]) + call_usermodehelper(trigger, trigger_argv, NULL, + UMH_NO_WAIT); - if (__ratelimit(&ratelimit)) + if (time_after_eq(now, last_print + (check_interval*HZ))) { + last_print = now; printk(KERN_INFO "Machine check events logged\n"); + } return 1; } @@ -535,78 +425,63 @@ static struct notifier_block mce_idle_notifier = { static __init int periodic_mcheck_init(void) { - idle_notifier_register(&mce_idle_notifier); - return 0; + next_interval = check_interval * HZ; + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); + idle_notifier_register(&mce_idle_notifier); + return 0; } __initcall(periodic_mcheck_init); + /* * Initialize Machine Checks for a CPU. */ -static int mce_cap_init(void) +static void mce_init(void *dummy) { u64 cap; - unsigned b; + int i; rdmsrl(MSR_IA32_MCG_CAP, cap); - b = cap & 0xff; - if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); - b = MAX_NR_BANKS; + banks = cap & 0xff; + if (banks > MCE_EXTENDED_BANK) { + banks = MCE_EXTENDED_BANK; + printk(KERN_INFO "MCE: warning: using only %d banks\n", + MCE_EXTENDED_BANK); } - - /* Don't support asymmetric configurations today */ - WARN_ON(banks != 0 && b != banks); - banks = b; - if (!bank) { - bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); - if (!bank) - return -ENOMEM; - memset(bank, 0xff, banks * sizeof(u64)); - } - /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) rip_msr = MSR_IA32_MCG_EIP; - return 0; -} - -static void mce_init(void *dummy) -{ - u64 cap; - int i; - mce_banks_t all_banks; - - /* - * Log the machine checks left over from the previous reset. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC, &all_banks); + /* Log the machine checks left over from the previous reset. + This also clears all registers */ + do_machine_check(NULL, mce_bootlog ? -1 : -2); set_in_cr4(X86_CR4_MCE); - rdmsrl(MSR_IA32_MCG_CAP, cap); if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + if (i < NR_SYSFS_BANKS) + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + else + wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } } /* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) + if(c->x86 == 15) /* disable GART TBL walk error reporting, which trips off incorrectly with the IOMMU & 3ware & Cerberus. */ - clear_bit(10, (unsigned long *)&bank[4]); + clear_bit(10, &bank[4]); if(c->x86 <= 17 && mce_bootlog < 0) /* Lots of broken BIOS around that don't clear them by default and leave crap in there. Don't log. */ @@ -629,38 +504,20 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) -{ - struct timer_list *t = &__get_cpu_var(mce_timer); - - /* data race harmless because everyone sets to the same value */ - if (!next_interval) - next_interval = check_interval * HZ; - if (!next_interval) - return; - setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies(jiffies + next_interval); - add_timer(t); -} - /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off. */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { - if (!mce_available(c)) - return; + mce_cpu_quirks(c); - if (mce_cap_init() < 0) { - mce_dont_init = 1; + if (mce_dont_init || + !mce_available(c)) return; - } - mce_cpu_quirks(c); mce_init(NULL); mce_cpu_features(c); - mce_init_timer(); } /* @@ -716,7 +573,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, { unsigned long *cpu_tsc; static DEFINE_MUTEX(mce_read_mutex); - unsigned prev, next; + unsigned next; char __user *buf = ubuf; int i, err; @@ -735,32 +592,25 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, } err = 0; - prev = 0; - do { - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - - while (!mcelog.entry[i].finished) { - if (time_after_eq(jiffies, start + 2)) { - memset(mcelog.entry + i, 0, - sizeof(struct mce)); - goto timeout; - } - cpu_relax(); + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + + while (!mcelog.entry[i].finished) { + if (time_after_eq(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + goto timeout; } - smp_rmb(); - err |= copy_to_user(buf, mcelog.entry + i, - sizeof(struct mce)); - buf += sizeof(struct mce); -timeout: - ; + cpu_relax(); } + smp_rmb(); + err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); + buf += sizeof(struct mce); + timeout: + ; + } - memset(mcelog.entry + prev, 0, - (next - prev) * sizeof(struct mce)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); + memset(mcelog.entry, 0, next * sizeof(struct mce)); + mcelog.next = 0; synchronize_sched(); @@ -830,6 +680,20 @@ static struct miscdevice mce_log_device = { &mce_chrdev_ops, }; +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + /* * Old style boot options parsing. Only for compatibility. */ @@ -839,7 +703,8 @@ static int __init mcheck_disable(char *str) return 1; } -/* mce=off disables machine check. +/* mce=off disables machine check. Note you can re-enable it later + using sysfs. mce=TOLERANCELEVEL (number, see above) mce=bootlog Log MCEs from before booting. Disabled by default on AMD. mce=nobootlog Don't log MCEs from before booting. */ @@ -863,29 +728,6 @@ __setup("mce=", mcheck_enable); * Sysfs support */ -/* - * Disable machine checks on suspend and shutdown. We can't really handle - * them later. - */ -static int mce_disable(void) -{ - int i; - - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); - return 0; -} - -static int mce_suspend(struct sys_device *dev, pm_message_t state) -{ - return mce_disable(); -} - -static int mce_shutdown(struct sys_device *dev) -{ - return mce_disable(); -} - /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. Only one CPU is active at this time, the others get readded later using CPU hotplug. */ @@ -896,24 +738,20 @@ static int mce_resume(struct sys_device *dev) return 0; } -static void mce_cpu_restart(void *data) -{ - del_timer_sync(&__get_cpu_var(mce_timer)); - if (mce_available(¤t_cpu_data)) - mce_init(NULL); - mce_init_timer(); -} - /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { + if (next_interval) + cancel_delayed_work(&mcheck_work); + /* Timer race is harmless here */ + on_each_cpu(mce_init, NULL, 1); next_interval = check_interval * HZ; - on_each_cpu(mce_cpu_restart, NULL, 1); + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); } static struct sysdev_class mce_sysclass = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, .resume = mce_resume, .name = "machinecheck", }; @@ -940,26 +778,16 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); -static struct sysdev_attribute *bank_attrs; - -static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, - char *buf) -{ - u64 b = bank[attr - bank_attrs]; - return sprintf(buf, "%llx\n", b); -} - -static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t siz) -{ - char *end; - u64 new = simple_strtoull(buf, &end, 0); - if (end == buf) - return -EINVAL; - bank[attr - bank_attrs] = new; - mce_restart(); - return end-buf; -} +/* + * TBD should generate these dynamically based on number of available banks. + * Have only 6 contol banks in /sysfs until then. + */ +ACCESSOR(bank0ctl,bank[0],mce_restart()) +ACCESSOR(bank1ctl,bank[1],mce_restart()) +ACCESSOR(bank2ctl,bank[2],mce_restart()) +ACCESSOR(bank3ctl,bank[3],mce_restart()) +ACCESSOR(bank4ctl,bank[4],mce_restart()) +ACCESSOR(bank5ctl,bank[5],mce_restart()) static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) @@ -986,6 +814,8 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); ACCESSOR(check_interval,check_interval,mce_restart()) static struct sysdev_attribute *mce_attributes[] = { + &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, + &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, &attr_tolerant.attr, &attr_check_interval, &attr_trigger, NULL }; @@ -1015,22 +845,11 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } - for (i = 0; i < banks; i++) { - err = sysdev_create_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - if (err) - goto error2; - } cpu_set(cpu, mce_device_initialized); return 0; -error2: - while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - } error: - while (--i >= 0) { + while (i--) { sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); } @@ -1049,46 +868,15 @@ static __cpuinit void mce_remove_device(unsigned int cpu) for (i = 0; mce_attributes[i]; i++) sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); - for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); cpu_clear(cpu, mce_device_initialized); } -/* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) -{ - int i; - unsigned long action = *(unsigned long *)h; - - if (!mce_available(¤t_cpu_data)) - return; - if (!(action & CPU_TASKS_FROZEN)) - cmci_clear(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); -} - -static void mce_reenable_cpu(void *h) -{ - int i; - unsigned long action = *(unsigned long *)h; - - if (!mce_available(¤t_cpu_data)) - return; - if (!(action & CPU_TASKS_FROZEN)) - cmci_reenable(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); -} - /* Get notified when a cpu comes on/off. Be hotplug friendly. */ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action) { case CPU_ONLINE: @@ -1103,21 +891,6 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, threshold_cpu_callback(action, cpu); mce_remove_device(cpu); break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + next_interval); - add_timer_on(t, cpu); - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - break; - case CPU_POST_DEAD: - /* intentionally ignoring frozen here */ - cmci_rediscover(cpu); - break; } return NOTIFY_OK; } @@ -1126,34 +899,6 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { .notifier_call = mce_cpu_callback, }; -static __init int mce_init_banks(void) -{ - int i; - - bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, - GFP_KERNEL); - if (!bank_attrs) - return -ENOMEM; - - for (i = 0; i < banks; i++) { - struct sysdev_attribute *a = &bank_attrs[i]; - a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); - if (!a->attr.name) - goto nomem; - a->attr.mode = 0644; - a->show = show_bank; - a->store = set_bank; - } - return 0; - -nomem: - while (--i >= 0) - kfree(bank_attrs[i].attr.name); - kfree(bank_attrs); - bank_attrs = NULL; - return -ENOMEM; -} - static __init int mce_init_device(void) { int err; @@ -1161,11 +906,6 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - - err = mce_init_banks(); - if (err) - return err; - err = sysdev_class_register(&mce_sysclass); if (err) return err; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index c5a32f92d07e..9817506dd469 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -79,8 +79,6 @@ static unsigned char shared_bank[NR_BANKS] = { static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ -static void amd_threshold_interrupt(void); - /* * CPU Initialization */ @@ -176,8 +174,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) tr.reset = 0; tr.old_limit = 0; threshold_restart_bank(&tr); - - mce_threshold_vector = amd_threshold_interrupt; } } } @@ -191,13 +187,19 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) * the interrupt goes off when error_count reaches threshold_limit. * the handler will simply log mcelog w/ software defined bank number. */ -static void amd_threshold_interrupt(void) +asmlinkage void mce_threshold_interrupt(void) { unsigned int bank, block; struct mce m; u32 low = 0, high = 0, address = 0; - mce_setup(&m); + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + memset(&m, 0, sizeof(m)); + rdtscll(m.tsc); + m.cpu = smp_processor_id(); /* assume first bank caused it */ for (bank = 0; bank < NR_BANKS; ++bank) { @@ -231,8 +233,7 @@ static void amd_threshold_interrupt(void) /* Log the machine check that caused the threshold event. */ - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + do_machine_check(NULL, 0); if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); @@ -242,10 +243,13 @@ static void amd_threshold_interrupt(void) + bank * NR_BLOCKS + block; mce_log(&m); - return; + goto out; } } } +out: + inc_irq_stat(irq_threshold_count); + irq_exit(); } /* diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index aaa7d9730938..aa5e287c98e0 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -1,8 +1,6 @@ /* * Intel specific MCE features. * Copyright 2004 Zwane Mwaikambo - * Copyright (C) 2008, 2009 Intel Corporation - * Author: Andi Kleen */ #include @@ -15,7 +13,6 @@ #include #include #include -#include asmlinkage void smp_thermal_interrupt(void) { @@ -28,7 +25,7 @@ asmlinkage void smp_thermal_interrupt(void) rdmsrl(MSR_IA32_THERM_STATUS, msr_val); if (therm_throt_process(msr_val & 1)) - mce_log_therm_throt_event(msr_val); + mce_log_therm_throt_event(smp_processor_id(), msr_val); inc_irq_stat(irq_thermal_count); irq_exit(); @@ -88,209 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) return; } -/* - * Support for Intel Correct Machine Check Interrupts. This allows - * the CPU to raise an interrupt when a corrected machine check happened. - * Normally we pick those up using a regular polling timer. - * Also supports reliable discovery of shared banks. - */ - -static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); - -/* - * cmci_discover_lock protects against parallel discovery attempts - * which could race against each other. - */ -static DEFINE_SPINLOCK(cmci_discover_lock); - -#define CMCI_THRESHOLD 1 - -static int cmci_supported(int *banks) -{ - u64 cap; - - /* - * Vendor check is not strictly needed, but the initial - * initialization is vendor keyed and this - * makes sure none of the backdoors are entered otherwise. - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return 0; - if (!cpu_has_apic || lapic_get_maxlvt() < 6) - return 0; - rdmsrl(MSR_IA32_MCG_CAP, cap); - *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); - return !!(cap & MCG_CMCI_P); -} - -/* - * The interrupt handler. This is called on every event. - * Just call the poller directly to log any events. - * This could in theory increase the threshold under high load, - * but doesn't for now. - */ -static void intel_threshold_interrupt(void) -{ - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - mce_notify_user(); -} - -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - -/* - * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks - * on this CPU. Use the algorithm recommended in the SDM to discover shared - * banks. - */ -static void cmci_discover(int banks, int boot) -{ - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); - int hdr = 0; - int i; - - spin_lock(&cmci_discover_lock); - for (i = 0; i < banks; i++) { - u64 val; - - if (test_bit(i, owned)) - continue; - - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - - /* Already owned by someone else? */ - if (val & CMCI_EN) { - if (test_and_clear_bit(i, owned) || boot) - print_update("SHD", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - continue; - } - - val |= CMCI_EN | CMCI_THRESHOLD; - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - - /* Did the enable bit stick? -- the bank supports CMCI */ - if (val & CMCI_EN) { - if (!test_and_set_bit(i, owned) || boot) - print_update("CMCI", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); - } - } - spin_unlock(&cmci_discover_lock); - if (hdr) - printk(KERN_CONT "\n"); -} - -/* - * Just in case we missed an event during initialization check - * all the CMCI owned banks. - */ -void cmci_recheck(void) -{ - unsigned long flags; - int banks; - - if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) - return; - local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - local_irq_restore(flags); -} - -/* - * Disable CMCI on this CPU for all banks it owns when it goes down. - * This allows other CPUs to claim the banks on rediscovery. - */ -void cmci_clear(void) -{ - int i; - int banks; - u64 val; - - if (!cmci_supported(&banks)) - return; - spin_lock(&cmci_discover_lock); - for (i = 0; i < banks; i++) { - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) - continue; - /* Disable CMCI */ - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - __clear_bit(i, __get_cpu_var(mce_banks_owned)); - } - spin_unlock(&cmci_discover_lock); -} - -/* - * After a CPU went down cycle through all the others and rediscover - * Must run in process context. - */ -void cmci_rediscover(int dying) -{ - int banks; - int cpu; - cpumask_var_t old; - - if (!cmci_supported(&banks)) - return; - if (!alloc_cpumask_var(&old, GFP_KERNEL)) - return; - cpumask_copy(old, ¤t->cpus_allowed); - - for_each_online_cpu (cpu) { - if (cpu == dying) - continue; - if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu))) - continue; - /* Recheck banks in case CPUs don't all have the same */ - if (cmci_supported(&banks)) - cmci_discover(banks, 0); - } - - set_cpus_allowed_ptr(current, old); - free_cpumask_var(old); -} - -/* - * Reenable CMCI on this CPU in case a CPU down failed. - */ -void cmci_reenable(void) -{ - int banks; - if (cmci_supported(&banks)) - cmci_discover(banks, 0); -} - -static __cpuinit void intel_init_cmci(void) -{ - int banks; - - if (!cmci_supported(&banks)) - return; - - mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); - /* - * For CPU #0 this runs with still disabled APIC, but that's - * ok because only the vector is set up. We still do another - * check for the banks later for CPU #0 just to make sure - * to not miss any events. - */ - apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); - cmci_recheck(); -} - void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); - intel_init_cmci(); } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c deleted file mode 100644 index 23ee9e730f78..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Common corrected MCE threshold handler code: - */ -#include -#include - -#include -#include -#include -#include - -static void default_threshold_interrupt(void) -{ - printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", - THRESHOLD_APIC_VECTOR); -} - -void (*mce_threshold_vector)(void) = default_threshold_interrupt; - -asmlinkage void mce_threshold_interrupt(void) -{ - exit_idle(); - irq_enter(); - inc_irq_stat(irq_threshold_count); - mce_threshold_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); -} diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index 7ba4621c0dfa..83d1836b9467 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -984,8 +984,6 @@ apicinterrupt UV_BAU_MESSAGE \ #endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt -apicinterrupt GENERIC_INTERRUPT_VECTOR \ - generic_interrupt smp_generic_interrupt #ifdef CONFIG_SMP apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index b864341dcc45..f13ca1650aaf 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -15,9 +15,6 @@ atomic_t irq_err_count; -/* Function pointer for generic interrupt vector handling */ -void (*generic_interrupt_extension)(void) = NULL; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -59,12 +56,6 @@ static int show_other_interrupts(struct seq_file *p) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_printf(p, " Local timer interrupts\n"); #endif - if (generic_interrupt_extension) { - seq_printf(p, "PLT: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); - seq_printf(p, " Platform interrupts\n"); - } #ifdef CONFIG_SMP seq_printf(p, "RES: "); for_each_online_cpu(j) @@ -172,8 +163,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; #endif - if (generic_interrupt_extension) - sum += irq_stats(cpu)->generic_irqs; #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; @@ -237,27 +226,4 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) return 1; } -/* - * Handler for GENERIC_INTERRUPT_VECTOR. - */ -void smp_generic_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - ack_APIC_irq(); - - exit_idle(); - - irq_enter(); - - inc_irq_stat(generic_irqs); - - if (generic_interrupt_extension) - generic_interrupt_extension(); - - irq_exit(); - - set_irq_regs(old_regs); -} - EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c index bc1326105448..50b8c3a3006c 100644 --- a/trunk/arch/x86/kernel/irqinit_32.c +++ b/trunk/arch/x86/kernel/irqinit_32.c @@ -175,9 +175,6 @@ void __init native_init_IRQ(void) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); - /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); diff --git a/trunk/arch/x86/kernel/irqinit_64.c b/trunk/arch/x86/kernel/irqinit_64.c index c7a49e0ffbfb..da481a1e3f30 100644 --- a/trunk/arch/x86/kernel/irqinit_64.c +++ b/trunk/arch/x86/kernel/irqinit_64.c @@ -147,9 +147,6 @@ static void __init apic_intr_init(void) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); - /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); diff --git a/trunk/arch/x86/kernel/machine_kexec_32.c b/trunk/arch/x86/kernel/machine_kexec_32.c index e7368c1da01d..f5fc8c781a62 100644 --- a/trunk/arch/x86/kernel/machine_kexec_32.c +++ b/trunk/arch/x86/kernel/machine_kexec_32.c @@ -14,12 +14,12 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -63,7 +63,7 @@ static void load_segments(void) "\tmovl %%eax,%%fs\n" "\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%ss\n" - : : : "eax", "memory"); + ::: "eax", "memory"); #undef STR #undef __STR } @@ -205,8 +205,7 @@ void machine_kexec(struct kimage *image) if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC - /* - * We need to put APICs in legacy mode so that we can + /* We need to put APICs in legacy mode so that we can * get timer interrupts in second kernel. kexec/kdump * paths already have calls to disable_IO_APIC() in * one form or other. kexec jump path also need @@ -228,8 +227,7 @@ void machine_kexec(struct kimage *image) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); - /* - * The segment registers are funny things, they have both a + /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is * set to a specific selector, the invisible part is loaded * with from a table in memory. At no other time is the @@ -239,12 +237,11 @@ void machine_kexec(struct kimage *image) * segments, before I zap the gdt with an invalid value. */ load_segments(); - /* - * The gdt & idt are now invalid. + /* The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0), 0); - set_idt(phys_to_virt(0), 0); + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, diff --git a/trunk/arch/x86/kernel/machine_kexec_64.c b/trunk/arch/x86/kernel/machine_kexec_64.c index 89cea4d44679..6993d51b7fd8 100644 --- a/trunk/arch/x86/kernel/machine_kexec_64.c +++ b/trunk/arch/x86/kernel/machine_kexec_64.c @@ -12,47 +12,11 @@ #include #include #include -#include -#include #include #include #include - -static int init_one_level2_page(struct kimage *image, pgd_t *pgd, - unsigned long addr) -{ - pud_t *pud; - pmd_t *pmd; - struct page *page; - int result = -ENOMEM; - - addr &= PMD_MASK; - pgd += pgd_index(addr); - if (!pgd_present(*pgd)) { - page = kimage_alloc_control_pages(image, 0); - if (!page) - goto out; - pud = (pud_t *)page_address(page); - memset(pud, 0, PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); - } - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) { - page = kimage_alloc_control_pages(image, 0); - if (!page) - goto out; - pmd = (pmd_t *)page_address(page); - memset(pmd, 0, PAGE_SIZE); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - } - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - result = 0; -out: - return result; -} +#include static void init_level2_page(pmd_t *level2p, unsigned long addr) { @@ -119,8 +83,9 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p, } level3p = (pud_t *)page_address(page); result = init_level3_page(image, level3p, addr, last_addr); - if (result) + if (result) { goto out; + } set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); addr += PGDIR_SIZE; } @@ -189,13 +154,6 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) int result; level4p = (pgd_t *)__va(start_pgtable); result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); - if (result) - return result; - /* - * image->start may be outside 0 ~ max_pfn, for example when - * jump back to original kernel from kexeced kernel - */ - result = init_one_level2_page(image, level4p, image->start); if (result) return result; return init_transition_pgtable(image, level4p); @@ -271,45 +229,20 @@ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; - int save_ftrace_enabled; -#ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) - save_processor_state(); -#endif - - save_ftrace_enabled = __ftrace_enabled_save(); + tracer_disable(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); - if (image->preserve_context) { -#ifdef CONFIG_X86_IO_APIC - /* - * We need to put APICs in legacy mode so that we can - * get timer interrupts in second kernel. kexec/kdump - * paths already have calls to disable_IO_APIC() in - * one form or other. kexec jump path also need - * one. - */ - disable_IO_APIC(); -#endif - } - control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + memcpy(control_page, relocate_kernel, PAGE_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); - if (image->type == KEXEC_TYPE_DEFAULT) - page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) - << PAGE_SHIFT); - - /* - * The segment registers are funny things, they have both a + /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is * set to a specific selector, the invisible part is loaded * with from a table in memory. At no other time is the @@ -319,25 +252,15 @@ void machine_kexec(struct kimage *image) * segments, before I zap the gdt with an invalid value. */ load_segments(); - /* - * The gdt & idt are now invalid. + /* The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0), 0); - set_idt(phys_to_virt(0), 0); + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); /* now call it */ - image->start = relocate_kernel((unsigned long)image->head, - (unsigned long)page_list, - image->start, - image->preserve_context); - -#ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) - restore_processor_state(); -#endif - - __ftrace_enabled_restore(save_ftrace_enabled); + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start); } void arch_crash_save_vmcoreinfo(void) diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c index e8192401da47..37cb1bda1baf 100644 --- a/trunk/arch/x86/kernel/mpparse.c +++ b/trunk/arch/x86/kernel/mpparse.c @@ -558,19 +558,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) static struct mpf_intel *mpf_found; -static unsigned long __init get_mpc_size(unsigned long physptr) -{ - struct mpc_table *mpc; - unsigned long size; - - mpc = early_ioremap(physptr, PAGE_SIZE); - size = mpc->length; - early_iounmap(mpc, PAGE_SIZE); - apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); - - return size; -} - /* * Scan the memory blocks for an SMP configuration block. */ @@ -624,16 +611,12 @@ static void __init __get_smp_config(unsigned int early) construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { - struct mpc_table *mpc; - unsigned long size; - size = get_mpc_size(mpf->physptr); - mpc = early_ioremap(mpf->physptr, size); /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(mpc, early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -641,10 +624,8 @@ static void __init __get_smp_config(unsigned int early) "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. " "(tell your hw vendor)\n"); - early_iounmap(mpc, size); return; } - early_iounmap(mpc, size); if (early) return; @@ -716,10 +697,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, if (!reserve) return 1; - reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); if (mpf->physptr) { - unsigned long size = get_mpc_size(mpf->physptr); + unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* * We cannot access to MPC table to compute diff --git a/trunk/arch/x86/kernel/quirks.c b/trunk/arch/x86/kernel/quirks.c index 6a5a2970f4c5..309949e9e1c1 100644 --- a/trunk/arch/x86/kernel/quirks.c +++ b/trunk/arch/x86/kernel/quirks.c @@ -74,7 +74,8 @@ static void ich_force_hpet_resume(void) if (!force_hpet_address) return; - BUG_ON(rcba_base == NULL); + if (rcba_base == NULL) + BUG(); /* read the Function Disable register, dword mode only */ val = readl(rcba_base + 0x3404); diff --git a/trunk/arch/x86/kernel/relocate_kernel_32.S b/trunk/arch/x86/kernel/relocate_kernel_32.S index 41235531b11c..2064d0aa8d28 100644 --- a/trunk/arch/x86/kernel/relocate_kernel_32.S +++ b/trunk/arch/x86/kernel/relocate_kernel_32.S @@ -17,8 +17,7 @@ #define PTR(x) (x << 2) -/* - * control_page + KEXEC_CONTROL_CODE_MAX_SIZE +/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE * ~ control_page + PAGE_SIZE are used as data storage and stack for * jumping back */ @@ -77,10 +76,8 @@ relocate_kernel: movl %eax, CP_PA_SWAP_PAGE(%edi) movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) - /* - * get physical address of control page now - * this is impossible after page table switch - */ + /* get physical address of control page now */ + /* this is impossible after page table switch */ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi /* switch to new set of page tables */ @@ -100,8 +97,7 @@ identity_mapped: /* store the start address on the stack */ pushl %edx - /* - * Set cr0 to a known state: + /* Set cr0 to a known state: * - Paging disabled * - Alignment check disabled * - Write protect disabled @@ -117,8 +113,7 @@ identity_mapped: /* clear cr4 if applicable */ testl %ecx, %ecx jz 1f - /* - * Set cr4 to a known state: + /* Set cr4 to a known state: * Setting everything to zero seems safe. */ xorl %eax, %eax @@ -137,18 +132,15 @@ identity_mapped: call swap_pages addl $8, %esp - /* - * To be certain of avoiding problems with self-modifying code + /* To be certain of avoiding problems with self-modifying code * I need to execute a serializing instruction here. * So I flush the TLB, it's handy, and not processor dependent. */ xorl %eax, %eax movl %eax, %cr3 - /* - * set all of the registers to known values - * leave %esp alone - */ + /* set all of the registers to known values */ + /* leave %esp alone */ testl %esi, %esi jnz 1f diff --git a/trunk/arch/x86/kernel/relocate_kernel_64.S b/trunk/arch/x86/kernel/relocate_kernel_64.S index 4de8f5b3d476..d32cfb27a479 100644 --- a/trunk/arch/x86/kernel/relocate_kernel_64.S +++ b/trunk/arch/x86/kernel/relocate_kernel_64.S @@ -19,77 +19,29 @@ #define PTR(x) (x << 3) #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -/* - * control_page + KEXEC_CONTROL_CODE_MAX_SIZE - * ~ control_page + PAGE_SIZE are used as data storage and stack for - * jumping back - */ -#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) - -/* Minimal CPU state */ -#define RSP DATA(0x0) -#define CR0 DATA(0x8) -#define CR3 DATA(0x10) -#define CR4 DATA(0x18) - -/* other data */ -#define CP_PA_TABLE_PAGE DATA(0x20) -#define CP_PA_SWAP_PAGE DATA(0x28) -#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) - .text .align PAGE_SIZE .code64 .globl relocate_kernel relocate_kernel: - /* - * %rdi indirection_page + /* %rdi indirection_page * %rsi page_list * %rdx start address - * %rcx preserve_context */ - /* Save the CPU context, used for jumping back */ - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - pushf - - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 - movq %rsp, RSP(%r11) - movq %cr0, %rax - movq %rax, CR0(%r11) - movq %cr3, %rax - movq %rax, CR3(%r11) - movq %cr4, %rax - movq %rax, CR4(%r11) - /* zero out flags, and disable interrupts */ pushq $0 popfq - /* - * get physical address of control page now - * this is impossible after page table switch - */ + /* get physical address of control page now */ + /* this is impossible after page table switch */ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 /* get physical address of page table now too */ - movq PTR(PA_TABLE_PAGE)(%rsi), %r9 - - /* get physical address of swap page now */ - movq PTR(PA_SWAP_PAGE)(%rsi), %r10 - - /* save some information for jumping back */ - movq %r9, CP_PA_TABLE_PAGE(%r11) - movq %r10, CP_PA_SWAP_PAGE(%r11) - movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) + movq PTR(PA_TABLE_PAGE)(%rsi), %rcx /* Switch to the identity mapped page tables */ - movq %r9, %cr3 + movq %rcx, %cr3 /* setup a new stack at the end of the physical control page */ lea PAGE_SIZE(%r8), %rsp @@ -103,8 +55,7 @@ identity_mapped: /* store the start address on the stack */ pushq %rdx - /* - * Set cr0 to a known state: + /* Set cr0 to a known state: * - Paging enabled * - Alignment check disabled * - Write protect disabled @@ -117,8 +68,7 @@ identity_mapped: orl $(X86_CR0_PG | X86_CR0_PE), %eax movq %rax, %cr0 - /* - * Set cr4 to a known state: + /* Set cr4 to a known state: * - physical address extension enabled */ movq $X86_CR4_PAE, %rax @@ -128,87 +78,9 @@ identity_mapped: 1: /* Flush the TLB (needed?) */ - movq %r9, %cr3 - - movq %rcx, %r11 - call swap_pages - - /* - * To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB by reloading %cr3 here, it's handy, - * and not processor dependent. - */ - movq %cr3, %rax - movq %rax, %cr3 - - /* - * set all of the registers to known values - * leave %rsp alone - */ - - testq %r11, %r11 - jnz 1f - xorq %rax, %rax - xorq %rbx, %rbx - xorq %rcx, %rcx - xorq %rdx, %rdx - xorq %rsi, %rsi - xorq %rdi, %rdi - xorq %rbp, %rbp - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r9 - xorq %r11, %r11 - xorq %r12, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 - - ret - -1: - popq %rdx - leaq PAGE_SIZE(%r10), %rsp - call *%rdx - - /* get the re-entry point of the peer system */ - movq 0(%rsp), %rbp - call 1f -1: - popq %r8 - subq $(1b - relocate_kernel), %r8 - movq CP_PA_SWAP_PAGE(%r8), %r10 - movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi - movq CP_PA_TABLE_PAGE(%r8), %rax - movq %rax, %cr3 - lea PAGE_SIZE(%r8), %rsp - call swap_pages - movq $virtual_mapped, %rax - pushq %rax - ret - -virtual_mapped: - movq RSP(%r8), %rsp - movq CR4(%r8), %rax - movq %rax, %cr4 - movq CR3(%r8), %rax - movq CR0(%r8), %r8 - movq %rax, %cr3 - movq %r8, %cr0 - movq %rbp, %rax - - popf - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx - ret + movq %rcx, %cr3 /* Do the copies */ -swap_pages: movq %rdi, %rcx /* Put the page_list in %rcx */ xorq %rdi, %rdi xorq %rsi, %rsi @@ -240,27 +112,36 @@ swap_pages: movq %rcx, %rsi /* For ever source page do a copy */ andq $0xfffffffffffff000, %rsi - movq %rdi, %rdx - movq %rsi, %rax - - movq %r10, %rdi movq $512, %rcx rep ; movsq + jmp 0b +3: - movq %rax, %rdi - movq %rdx, %rsi - movq $512, %rcx - rep ; movsq + /* To be certain of avoiding problems with self-modifying code + * I need to execute a serializing instruction here. + * So I flush the TLB by reloading %cr3 here, it's handy, + * and not processor dependent. + */ + movq %cr3, %rax + movq %rax, %cr3 - movq %rdx, %rdi - movq %r10, %rsi - movq $512, %rcx - rep ; movsq + /* set all of the registers to known values */ + /* leave %rsp alone */ - lea PAGE_SIZE(%rax), %rsi - jmp 0b -3: - ret + xorq %rax, %rax + xorq %rbx, %rbx + xorq %rcx, %rcx + xorq %rdx, %rdx + xorq %rsi, %rsi + xorq %rdi, %rdi + xorq %rbp, %rbp + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r9 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 - .globl kexec_control_code_size -.set kexec_control_code_size, . - relocate_kernel + ret diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index f28c56e6bf94..b746deb9ebc6 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -202,9 +202,7 @@ struct ist_info ist_info; #endif #else -struct cpuinfo_x86 boot_cpu_data __read_mostly = { - .x86_phys_bits = MAX_PHYSMEM_BITS, -}; +struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); #endif diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index ef7d10170c30..249334f5080a 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -114,6 +114,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; + +/* Set if we find a B stepping CPU */ +static int __cpuinitdata smp_b_stepping; + #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) /* which logical CPUs are on which nodes */ @@ -267,6 +271,8 @@ static void __cpuinit smp_callin(void) cpumask_set_cpu(cpuid, cpu_callin_mask); } +static int __cpuinitdata unsafe_smp; + /* * Activate a secondary processor. */ @@ -334,6 +340,76 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } +static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) +{ + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + + if (num_possible_cpus() == 1) + goto valid_k7; + + /* Athlon 660/661 is valid. */ + if ((c->x86_model == 6) && ((c->x86_mask == 0) || + (c->x86_mask == 1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model == 7) && (c->x86_mask == 0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability + * bit. It's worth noting that the A5 stepping (662) of some + * Athlon XP's have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for + * more. + */ + if (((c->x86_model == 6) && (c->x86_mask >= 2)) || + ((c->x86_model == 7) && (c->x86_mask >= 1)) || + (c->x86_model > 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, not a certified SMP capable AMD system. */ + unsafe_smp = 1; + } + +valid_k7: + ; +} + +static void __cpuinit smp_checks(void) +{ + if (smp_b_stepping) + printk(KERN_WARNING "WARNING: SMP operation may be unreliable" + "with B stepping processors.\n"); + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + if (unsafe_smp && num_online_cpus() > 1) { + printk(KERN_INFO "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + add_taint(TAINT_UNSAFE_SMP); + } +} + /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -347,6 +423,7 @@ void __cpuinit smp_store_cpu_info(int id) c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); + smp_apply_quirks(c); } @@ -1116,6 +1193,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) pr_debug("Boot done.\n"); impress_friends(); + smp_checks(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif diff --git a/trunk/arch/x86/kernel/uv_time.c b/trunk/arch/x86/kernel/uv_time.c deleted file mode 100644 index 2ffb6c53326e..000000000000 --- a/trunk/arch/x86/kernel/uv_time.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * SGI RTC clock/timer routines. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Dimitri Sivanich - */ -#include - -#include -#include -#include -#include -#include -#include - -#define RTC_NAME "sgi_rtc" - -static cycle_t uv_read_rtc(void); -static int uv_rtc_next_event(unsigned long, struct clock_event_device *); -static void uv_rtc_timer_setup(enum clock_event_mode, - struct clock_event_device *); - -static struct clocksource clocksource_uv = { - .name = RTC_NAME, - .rating = 400, - .read = uv_read_rtc, - .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, - .shift = 10, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static struct clock_event_device clock_event_device_uv = { - .name = RTC_NAME, - .features = CLOCK_EVT_FEAT_ONESHOT, - .shift = 20, - .rating = 400, - .irq = -1, - .set_next_event = uv_rtc_next_event, - .set_mode = uv_rtc_timer_setup, - .event_handler = NULL, -}; - -static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); - -/* There is one of these allocated per node */ -struct uv_rtc_timer_head { - spinlock_t lock; - /* next cpu waiting for timer, local node relative: */ - int next_cpu; - /* number of cpus on this node: */ - int ncpus; - struct { - int lcpu; /* systemwide logical cpu number */ - u64 expires; /* next timer expiration for this cpu */ - } cpu[1]; -}; - -/* - * Access to uv_rtc_timer_head via blade id. - */ -static struct uv_rtc_timer_head **blade_info __read_mostly; - -static int uv_rtc_enable; - -/* - * Hardware interface routines - */ - -/* Send IPIs to another node */ -static void uv_rtc_send_IPI(int cpu) -{ - unsigned long apicid, val; - int pnode; - - apicid = cpu_physical_id(cpu); - pnode = uv_apicid_to_pnode(apicid); - val = (1UL << UVH_IPI_INT_SEND_SHFT) | - (apicid << UVH_IPI_INT_APIC_ID_SHFT) | - (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT); - - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); -} - -/* Check for an RTC interrupt pending */ -static int uv_intr_pending(int pnode) -{ - return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & - UVH_EVENT_OCCURRED0_RTC1_MASK; -} - -/* Setup interrupt and return non-zero if early expiration occurred. */ -static int uv_setup_intr(int cpu, u64 expires) -{ - u64 val; - int pnode = uv_cpu_to_pnode(cpu); - - uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, - UVH_RTC1_INT_CONFIG_M_MASK); - uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); - - uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, - UVH_EVENT_OCCURRED0_RTC1_MASK); - - val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | - ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); - - /* Set configuration */ - uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); - /* Initialize comparator value */ - uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); - - return (expires < uv_read_rtc() && !uv_intr_pending(pnode)); -} - -/* - * Per-cpu timer tracking routines - */ - -static __init void uv_rtc_deallocate_timers(void) -{ - int bid; - - for_each_possible_blade(bid) { - kfree(blade_info[bid]); - } - kfree(blade_info); -} - -/* Allocate per-node list of cpu timer expiration times. */ -static __init int uv_rtc_allocate_timers(void) -{ - int cpu; - - blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); - if (!blade_info) - return -ENOMEM; - memset(blade_info, 0, uv_possible_blades * sizeof(void *)); - - for_each_present_cpu(cpu) { - int nid = cpu_to_node(cpu); - int bid = uv_cpu_to_blade_id(cpu); - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; - struct uv_rtc_timer_head *head = blade_info[bid]; - - if (!head) { - head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + - (uv_blade_nr_possible_cpus(bid) * - 2 * sizeof(u64)), - GFP_KERNEL, nid); - if (!head) { - uv_rtc_deallocate_timers(); - return -ENOMEM; - } - spin_lock_init(&head->lock); - head->ncpus = uv_blade_nr_possible_cpus(bid); - head->next_cpu = -1; - blade_info[bid] = head; - } - - head->cpu[bcpu].lcpu = cpu; - head->cpu[bcpu].expires = ULLONG_MAX; - } - - return 0; -} - -/* Find and set the next expiring timer. */ -static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) -{ - u64 lowest = ULLONG_MAX; - int c, bcpu = -1; - - head->next_cpu = -1; - for (c = 0; c < head->ncpus; c++) { - u64 exp = head->cpu[c].expires; - if (exp < lowest) { - bcpu = c; - lowest = exp; - } - } - if (bcpu >= 0) { - head->next_cpu = bcpu; - c = head->cpu[bcpu].lcpu; - if (uv_setup_intr(c, lowest)) - /* If we didn't set it up in time, trigger */ - uv_rtc_send_IPI(c); - } else { - uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, - UVH_RTC1_INT_CONFIG_M_MASK); - } -} - -/* - * Set expiration time for current cpu. - * - * Returns 1 if we missed the expiration time. - */ -static int uv_rtc_set_timer(int cpu, u64 expires) -{ - int pnode = uv_cpu_to_pnode(cpu); - int bid = uv_cpu_to_blade_id(cpu); - struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; - u64 *t = &head->cpu[bcpu].expires; - unsigned long flags; - int next_cpu; - - spin_lock_irqsave(&head->lock, flags); - - next_cpu = head->next_cpu; - *t = expires; - /* Will this one be next to go off? */ - if (next_cpu < 0 || bcpu == next_cpu || - expires < head->cpu[next_cpu].expires) { - head->next_cpu = bcpu; - if (uv_setup_intr(cpu, expires)) { - *t = ULLONG_MAX; - uv_rtc_find_next_timer(head, pnode); - spin_unlock_irqrestore(&head->lock, flags); - return 1; - } - } - - spin_unlock_irqrestore(&head->lock, flags); - return 0; -} - -/* - * Unset expiration time for current cpu. - * - * Returns 1 if this timer was pending. - */ -static int uv_rtc_unset_timer(int cpu) -{ - int pnode = uv_cpu_to_pnode(cpu); - int bid = uv_cpu_to_blade_id(cpu); - struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; - u64 *t = &head->cpu[bcpu].expires; - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&head->lock, flags); - - if (head->next_cpu == bcpu && uv_read_rtc() >= *t) - rc = 1; - - *t = ULLONG_MAX; - - /* Was the hardware setup for this timer? */ - if (head->next_cpu == bcpu) - uv_rtc_find_next_timer(head, pnode); - - spin_unlock_irqrestore(&head->lock, flags); - - return rc; -} - - -/* - * Kernel interface routines. - */ - -/* - * Read the RTC. - */ -static cycle_t uv_read_rtc(void) -{ - return (cycle_t)uv_read_local_mmr(UVH_RTC); -} - -/* - * Program the next event, relative to now - */ -static int uv_rtc_next_event(unsigned long delta, - struct clock_event_device *ced) -{ - int ced_cpu = cpumask_first(ced->cpumask); - - return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc()); -} - -/* - * Setup the RTC timer in oneshot mode - */ -static void uv_rtc_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - int ced_cpu = cpumask_first(evt->cpumask); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here yet */ - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - uv_rtc_unset_timer(ced_cpu); - break; - } -} - -static void uv_rtc_interrupt(void) -{ - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); - int cpu = smp_processor_id(); - - if (!ced || !ced->event_handler) - return; - - if (uv_rtc_unset_timer(cpu) != 1) - return; - - ced->event_handler(ced); -} - -static int __init uv_enable_rtc(char *str) -{ - uv_rtc_enable = 1; - - return 1; -} -__setup("uvrtc", uv_enable_rtc); - -static __init void uv_rtc_register_clockevents(struct work_struct *dummy) -{ - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); - - *ced = clock_event_device_uv; - ced->cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(ced); -} - -static __init int uv_rtc_setup_clock(void) -{ - int rc; - - if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) - return -ENODEV; - - generic_interrupt_extension = uv_rtc_interrupt; - - clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, - clocksource_uv.shift); - - rc = clocksource_register(&clocksource_uv); - if (rc) { - generic_interrupt_extension = NULL; - return rc; - } - - /* Setup and register clockevents */ - rc = uv_rtc_allocate_timers(); - if (rc) { - clocksource_unregister(&clocksource_uv); - generic_interrupt_extension = NULL; - return rc; - } - - clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, - NSEC_PER_SEC, clock_event_device_uv.shift); - - clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / - sn_rtc_cycles_per_second; - - clock_event_device_uv.max_delta_ns = clocksource_uv.mask * - (NSEC_PER_SEC / sn_rtc_cycles_per_second); - - rc = schedule_on_each_cpu(uv_rtc_register_clockevents); - if (rc) { - clocksource_unregister(&clocksource_uv); - generic_interrupt_extension = NULL; - uv_rtc_deallocate_timers(); - } - - return rc; -} -arch_initcall(uv_rtc_setup_clock); diff --git a/trunk/arch/x86/kernel/visws_quirks.c b/trunk/arch/x86/kernel/visws_quirks.c index 31ffc24eec4d..191a876e9e87 100644 --- a/trunk/arch/x86/kernel/visws_quirks.c +++ b/trunk/arch/x86/kernel/visws_quirks.c @@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = { static irqreturn_t piix4_master_intr(int irq, void *dev_id) { int realirq; - struct irq_desc *desc; + irq_desc_t *desc; unsigned long flags; spin_lock_irqsave(&i8259A_lock, flags); diff --git a/trunk/arch/x86/kernel/vmlinux_64.lds.S b/trunk/arch/x86/kernel/vmlinux_64.lds.S index 5bf54e40c6ef..fbfced6f6800 100644 --- a/trunk/arch/x86/kernel/vmlinux_64.lds.S +++ b/trunk/arch/x86/kernel/vmlinux_64.lds.S @@ -275,10 +275,3 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), ASSERT((per_cpu__irq_stack_union == 0), "irq_stack_union is not at start of per-cpu area"); #endif - -#ifdef CONFIG_KEXEC -#include - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif diff --git a/trunk/arch/x86/mm/highmem_32.c b/trunk/arch/x86/mm/highmem_32.c index f256e73542d7..00f127c80b0e 100644 --- a/trunk/arch/x86/mm/highmem_32.c +++ b/trunk/arch/x86/mm/highmem_32.c @@ -121,30 +121,23 @@ void kunmap_atomic(void *kvaddr, enum km_type type) pagefault_enable(); } -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +/* This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; pagefault_disable(); - debug_kmap_atomic_prot(type); - - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); arch_flush_lazy_mmu_mode(); return (void*) vaddr; } - -/* This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) -{ - return kmap_atomic_prot_pfn(pfn, type, kmap_prot); -} EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ struct page *kmap_atomic_to_page(void *ptr) @@ -165,6 +158,7 @@ EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); +#ifdef CONFIG_NUMA void __init set_highmem_pages_init(void) { struct zone *zone; @@ -188,3 +182,11 @@ void __init set_highmem_pages_init(void) } totalram_pages += totalhigh_pages; } +#else +void __init set_highmem_pages_init(void) +{ + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); + + totalram_pages += totalhigh_pages; +} +#endif /* CONFIG_NUMA */ diff --git a/trunk/arch/x86/mm/init.c b/trunk/arch/x86/mm/init.c index 15219e0d1243..ce6a722587d8 100644 --- a/trunk/arch/x86/mm/init.c +++ b/trunk/arch/x86/mm/init.c @@ -1,345 +1,8 @@ -#include #include - #include -#include -#include #include -#include #include #include -#include - -unsigned long __initdata e820_table_start; -unsigned long __meminitdata e820_table_end; -unsigned long __meminitdata e820_table_top; - -int after_bootmem; - -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) -{ - unsigned long puds, pmds, ptes, tables, start; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - - /* - * RED-PEN putting page tables only on node 0 could - * cause a hotspot and fill up ZONE_DMA. The page tables - * need roughly 0.5KB per GB. - */ -#ifdef CONFIG_X86_32 - start = 0x7000; - e820_table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; - e820_table_end = e820_table_start; - e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); -} - -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -#ifdef CONFIG_X86_32 -#define NR_RANGE_MR 3 -#else /* CONFIG_X86_64 */ -#define NR_RANGE_MR 5 -#endif - -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) -{ - if (start_pfn < end_pfn) { - if (nr_range >= NR_RANGE_MR) - panic("run out of range for init_memory_mapping\n"); - mr[nr_range].start = start_pfn<> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; -#ifdef CONFIG_X86_32 - /* - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. - */ - if (pos == 0) - end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); - else - end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#endif - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (2M) range */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#ifdef CONFIG_X86_32 - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); -#endif - - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & - ((1<>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - - /* try to merge same page size and continuous */ - for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { - unsigned long old_start; - if (mr[i].end != mr[i+1].start || - mr[i].page_size_mask != mr[i+1].page_size_mask) - continue; - /* move it */ - old_start = mr[i].start; - memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof(struct map_range)); - mr[i--].start = old_start; - nr_range--; - } - - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1< e820_table_start) - reserve_early(e820_table_start << PAGE_SHIFT, - e820_table_end << PAGE_SHIFT, "PGTABLE"); - - if (!after_bootmem) - early_memtest(start, end); - - return ret >> PAGE_SHIFT; -} - - -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; -} void free_init_pages(char *what, unsigned long begin, unsigned long end) { @@ -384,10 +47,3 @@ void free_initmem(void) (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c index db81e9a8556b..47df0e1bbeb9 100644 --- a/trunk/arch/x86/mm/init_32.c +++ b/trunk/arch/x86/mm/init_32.c @@ -49,7 +49,6 @@ #include #include #include -#include unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; @@ -59,14 +58,19 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); -bool __read_mostly __vmalloc_start_set = false; + +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + +static int __initdata after_init_bootmem; static __init void *alloc_low_page(void) { - unsigned long pfn = e820_table_end++; + unsigned long pfn = table_end++; void *adr; - if (pfn >= e820_table_top) + if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); adr = __va(pfn * PAGE_SIZE); @@ -86,7 +90,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_bootmem) + if (after_init_bootmem) pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); else pmd_table = (pmd_t *)alloc_low_page(); @@ -113,7 +117,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; - if (after_bootmem) { + if (after_init_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif @@ -164,12 +168,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, if (pmd_idx_kmap_begin != pmd_idx_kmap_end && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end - && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start - || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { + && ((__pa(pte) >> PAGE_SHIFT) < table_start + || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { pte_t *newpte; int i; - BUG_ON(after_bootmem); + BUG_ON(after_init_bootmem); newpte = alloc_low_page(); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(newpte + i, pte[i]); @@ -238,14 +242,11 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) +static void __init kernel_physical_mapping_init(pgd_t *pgd_base, + unsigned long start_pfn, + unsigned long end_pfn, + int use_pse) { - int use_pse = page_size_mask == (1<> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; - /* * First iteration will setup identity mapping using large/small pages * based on use_pse, with other attributes same as set by @@ -371,6 +369,26 @@ kernel_physical_mapping_init(unsigned long start, mapping_iter = 2; goto repeat; } +} + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pagenr)) + return 1; return 0; } @@ -527,9 +545,8 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -void __init early_ioremap_page_table_range_init(void) +static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) { - pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; /* @@ -624,7 +641,7 @@ static int __init noexec_setup(char *str) } early_param("noexec", noexec_setup); -void __init set_nx(void) +static void __init set_nx(void) { unsigned int v[4], l, h; @@ -776,8 +793,6 @@ void __init initmem_init(unsigned long start_pfn, #ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif - __vmalloc_start_set = true; - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); @@ -799,66 +814,176 @@ static void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -static unsigned long __init setup_node_bootmem(int nodeid, - unsigned long start_pfn, - unsigned long end_pfn, - unsigned long bootmap) -{ - unsigned long bootmap_size; - - /* don't touch min_low_pfn */ - bootmap_size = init_bootmem_node(NODE_DATA(nodeid), - bootmap >> PAGE_SHIFT, - start_pfn, end_pfn); - printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", - nodeid, start_pfn<> PAGE_SHIFT, + min_low_pfn, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped< max_low_pfn) - continue; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + extra += PMD_SIZE; + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables>>PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, + (table_start << PAGE_SHIFT) + tables); +} + +unsigned long __init_refok init_memory_mapping(unsigned long start, + unsigned long end) +{ + pgd_t *pgd_base = swapper_pg_dir; + unsigned long start_pfn, end_pfn; + unsigned long big_page_start; +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. + * This will simplify cpa(), which otherwise needs to support splitting + * large pages into small in interrupt context, etc. + */ + int use_pse = 0; #else - start_pfn = 0; - end_pfn = max_low_pfn; + int use_pse = cpu_has_pse; #endif - bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, - bootmap); + + /* + * Find space for the kernel direct mapping tables. + */ + if (!after_init_bootmem) + find_early_table_space(end, use_pse); + +#ifdef CONFIG_X86_PAE + set_nx(); + if (nx_enabled) + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); +#endif + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } + + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + big_page_start = PMD_SIZE; + + if (start < big_page_start) { + start_pfn = start >> PAGE_SHIFT; + end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); + } else { + /* head is not big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); } + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); + + /* big page range */ + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < (big_page_start >> PAGE_SHIFT)) + start_pfn = big_page_start >> PAGE_SHIFT; + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, + use_pse); + + /* tail is not big page alignment ? */ + start_pfn = end_pfn; + if (start_pfn > (big_page_start>>PAGE_SHIFT)) { + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, + end_pfn, 0); + } + + early_ioremap_page_table_range_init(pgd_base); - after_bootmem = 1; + load_cr3(swapper_pg_dir); + + __flush_tlb_all(); + + if (!after_init_bootmem) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + if (!after_init_bootmem) + early_memtest(start, end); + + return end >> PAGE_SHIFT; } + /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. @@ -1092,6 +1217,13 @@ void mark_rodata_ro(void) } #endif +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif + int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c index 54efa57d1c03..07f44d491df1 100644 --- a/trunk/arch/x86/mm/init_64.c +++ b/trunk/arch/x86/mm/init_64.c @@ -48,7 +48,6 @@ #include #include #include -#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -62,6 +61,12 @@ static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +int direct_gbpages +#ifdef CONFIG_DIRECT_GBPAGES + = 1 +#endif +; + static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -82,10 +87,12 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ +int after_bootmem; + pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int disable_nx __cpuinitdata; +static int do_not_nx __cpuinitdata; /* * noexec=on|off @@ -100,9 +107,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; + do_not_nx = 0; } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; + do_not_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -114,7 +121,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || disable_nx) + if (!(efer & EFER_NX) || do_not_nx) __supported_pte_mask &= ~_PAGE_NX; } @@ -318,9 +325,13 @@ void __init cleanup_highmap(void) } } +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + static __ref void *alloc_low_page(unsigned long *phys) { - unsigned long pfn = e820_table_end++; + unsigned long pfn = table_end++; void *adr; if (after_bootmem) { @@ -330,7 +341,7 @@ static __ref void *alloc_low_page(unsigned long *phys) return adr; } - if (pfn >= e820_table_top) + if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); @@ -570,10 +581,58 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) +{ + unsigned long puds, pmds, ptes, tables, start; + + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + if (use_gbpages) { + unsigned long extra; + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ + start = 0x8000; + table_start = find_e820_area(start, end, tables, PAGE_SIZE); + if (table_start == -1UL) + panic("Cannot find space for the kernel page tables"); + + table_start >>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); +} + +static void __init init_gbpages(void) +{ + if (direct_gbpages && cpu_has_gbpages) + printk(KERN_INFO "Using GB pages for direct mapping\n"); + else + direct_gbpages = 0; +} + +static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) { unsigned long next, last_map_addr = end; @@ -610,6 +669,176 @@ kernel_physical_mapping_init(unsigned long start, return last_map_addr; } +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#define NR_RANGE_MR 5 + +static int save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } + + /* big page (2M) range*/ + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* try to merge same page size and continuous */ + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { + unsigned long old_start; + if (mr[i].end != mr[i+1].start || + mr[i].page_size_mask != mr[i+1].page_size_mask) + continue; + /* move it */ + old_start = mr[i].start; + memmove(&mr[i], &mr[i+1], + (nr_range - 1 - i) * sizeof (struct map_range)); + mr[i--].start = old_start; + nr_range--; + } + + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %010lx - %010lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1< table_start) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + printk(KERN_INFO "last_map_addr: %lx end: %lx\n", + last_map_addr, end); + + if (!after_bootmem) + early_memtest(start, end); + + return last_map_addr >> PAGE_SHIFT; +} + #ifndef CONFIG_NUMA void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) { @@ -681,6 +910,28 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; @@ -768,6 +1019,13 @@ void mark_rodata_ro(void) #endif +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif + int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/trunk/arch/x86/mm/iomap_32.c b/trunk/arch/x86/mm/iomap_32.c index 592984e5496b..04102d42ff42 100644 --- a/trunk/arch/x86/mm/iomap_32.c +++ b/trunk/arch/x86/mm/iomap_32.c @@ -18,7 +18,6 @@ #include #include -#include #include int is_io_mapping_possible(resource_size_t base, unsigned long size) @@ -37,6 +36,11 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible); void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the @@ -46,7 +50,12 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - return kmap_atomic_prot_pfn(pfn, type, prot); + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); diff --git a/trunk/arch/x86/mm/ioremap.c b/trunk/arch/x86/mm/ioremap.c index aca924a30ee6..433f7bd4648a 100644 --- a/trunk/arch/x86/mm/ioremap.c +++ b/trunk/arch/x86/mm/ioremap.c @@ -38,7 +38,8 @@ unsigned long __phys_addr(unsigned long x) } else { VIRTUAL_BUG_ON(x < PAGE_OFFSET); x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); + VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : + !phys_addr_valid(x)); } return x; } @@ -55,8 +56,10 @@ bool __virt_addr_valid(unsigned long x) if (x < PAGE_OFFSET) return false; x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) + if (system_state == SYSTEM_BOOTING ? + x > MAXMEM : !phys_addr_valid(x)) { return false; + } } return pfn_valid(x >> PAGE_SHIFT); @@ -73,9 +76,10 @@ static inline int phys_addr_valid(unsigned long addr) #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { - /* VMALLOC_* aren't constants */ + /* VMALLOC_* aren't constants; not available at the boot time */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); + VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && + is_vmalloc_addr((void *) x)); return x - PAGE_OFFSET; } EXPORT_SYMBOL(__phys_addr); @@ -85,9 +89,7 @@ bool __virt_addr_valid(unsigned long x) { if (x < PAGE_OFFSET) return false; - if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) - return false; - if (x >= FIXADDR_START) + if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } @@ -506,19 +508,13 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } -static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - void __init early_ioremap_init(void) { pmd_t *pmd; - int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); pmd_populate_kernel(&init_mm, pmd, bm_pte); @@ -585,7 +581,6 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; - static int __init check_early_ioremap_leak(void) { int count = 0; @@ -607,8 +602,7 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem * -__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -674,9 +668,9 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); + printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); - prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); + prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); return prev_map[slot]; } @@ -744,3 +738,8 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } + +void __this_fixmap_does_not_exist(void) +{ + WARN_ON(1); +} diff --git a/trunk/arch/x86/mm/kmmio.c b/trunk/arch/x86/mm/kmmio.c index 4f115e00486b..6a518dd08a36 100644 --- a/trunk/arch/x86/mm/kmmio.c +++ b/trunk/arch/x86/mm/kmmio.c @@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + pr_warning("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } diff --git a/trunk/arch/x86/mm/memtest.c b/trunk/arch/x86/mm/memtest.c index 605c8be06217..0bcd7883d036 100644 --- a/trunk/arch/x86/mm/memtest.c +++ b/trunk/arch/x86/mm/memtest.c @@ -100,9 +100,6 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); - else - memtest_pattern = ARRAY_SIZE(patterns); - return 0; } diff --git a/trunk/arch/x86/mm/numa_32.c b/trunk/arch/x86/mm/numa_32.c index 3daefa04ace5..451fe95a0352 100644 --- a/trunk/arch/x86/mm/numa_32.c +++ b/trunk/arch/x86/mm/numa_32.c @@ -416,11 +416,10 @@ void __init initmem_init(unsigned long start_pfn, for_each_online_node(nid) propagate_e820_map_node(nid); - for_each_online_node(nid) { + for_each_online_node(nid) memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - } + NODE_DATA(0)->bdata = &bootmem_node_data[0]; setup_bootmem_allocator(); } diff --git a/trunk/arch/x86/mm/pageattr.c b/trunk/arch/x86/mm/pageattr.c index 9c4294986af7..8253bc97587e 100644 --- a/trunk/arch/x86/mm/pageattr.c +++ b/trunk/arch/x86/mm/pageattr.c @@ -522,17 +522,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) * primary protection behavior: */ __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); - - /* - * Intel Atom errata AAH41 workaround. - * - * The real fix should be in hw or in a microcode update, but - * we also probabilistically try to reduce the window of having - * a large TLB mixed with 4K TLBs while instruction fetches are - * going on. - */ - __flush_tlb_all(); - base = NULL; out_unlock: diff --git a/trunk/drivers/char/hvcs.c b/trunk/drivers/char/hvcs.c index c76bccf5354d..6e6eb445d374 100644 --- a/trunk/drivers/char/hvcs.c +++ b/trunk/drivers/char/hvcs.c @@ -1139,6 +1139,15 @@ static int hvcs_open(struct tty_struct *tty, struct file *filp) hvcsd->tty = tty; tty->driver_data = hvcsd; + /* + * Set this driver to low latency so that we actually have a chance at + * catching a throttled TTY after we flip_buffer_push. Otherwise the + * flush_to_async may not execute until after the kernel_thread has + * yielded and resumed the next flip_buffer_push resulting in data + * loss. + */ + tty->low_latency = 1; + memset(&hvcsd->buffer[0], 0x00, HVCS_BUFF_LEN); /* diff --git a/trunk/drivers/char/hvsi.c b/trunk/drivers/char/hvsi.c index 2989056a9e39..406f8742a260 100644 --- a/trunk/drivers/char/hvsi.c +++ b/trunk/drivers/char/hvsi.c @@ -810,6 +810,7 @@ static int hvsi_open(struct tty_struct *tty, struct file *filp) hp = &hvsi_ports[line]; tty->driver_data = hp; + tty->low_latency = 1; /* avoid throttle/tty_flip_buffer_push race */ mb(); if (hp->state == HVSI_FSP_DIED) diff --git a/trunk/drivers/gpu/drm/i915/i915_dma.c b/trunk/drivers/gpu/drm/i915/i915_dma.c index 6d21b9e48b89..6dab63bdc4c1 100644 --- a/trunk/drivers/gpu/drm/i915/i915_dma.c +++ b/trunk/drivers/gpu/drm/i915/i915_dma.c @@ -1105,7 +1105,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) 1024 * 1024, MTRR_TYPE_WRCOMB, 1); if (dev_priv->mm.gtt_mtrr < 0) { - DRM_INFO("MTRR allocation failed. Graphics " + DRM_INFO("MTRR allocation failed\n. Graphics " "performance may suffer.\n"); } diff --git a/trunk/drivers/gpu/drm/i915/i915_drv.h b/trunk/drivers/gpu/drm/i915/i915_drv.h index d6cc9861e0a1..17fa40858d26 100644 --- a/trunk/drivers/gpu/drm/i915/i915_drv.h +++ b/trunk/drivers/gpu/drm/i915/i915_drv.h @@ -279,6 +279,7 @@ typedef struct drm_i915_private { u8 saveAR_INDEX; u8 saveAR[21]; u8 saveDACMASK; + u8 saveDACDATA[256*3]; /* 256 3-byte colors */ u8 saveCR[37]; struct { @@ -456,12 +457,6 @@ struct drm_i915_gem_object { /** for phy allocated objects */ struct drm_i915_gem_phys_object *phys_obj; - - /** - * Used for checking the object doesn't appear more than once - * in an execbuffer object list. - */ - int in_execbuffer; }; /** diff --git a/trunk/drivers/gpu/drm/i915/i915_gem.c b/trunk/drivers/gpu/drm/i915/i915_gem.c index 37427e4016cb..85685bfd12da 100644 --- a/trunk/drivers/gpu/drm/i915/i915_gem.c +++ b/trunk/drivers/gpu/drm/i915/i915_gem.c @@ -1476,7 +1476,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) struct drm_i915_gem_object *obj_priv = obj->driver_private; int regnum = obj_priv->fence_reg; int tile_width; - uint32_t fence_reg, val; + uint32_t val; uint32_t pitch_val; if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || @@ -1503,11 +1503,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; - if (regnum < 8) - fence_reg = FENCE_REG_830_0 + (regnum * 4); - else - fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); - I915_WRITE(fence_reg, val); + I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); } static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) @@ -1561,8 +1557,7 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_i915_fence_reg *reg = NULL; - struct drm_i915_gem_object *old_obj_priv = NULL; - int i, ret, avail; + int i, ret; switch (obj_priv->tiling_mode) { case I915_TILING_NONE: @@ -1585,46 +1580,25 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) } /* First try to find a free reg */ -try_again: - avail = 0; for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { reg = &dev_priv->fence_regs[i]; if (!reg->obj) break; - - old_obj_priv = reg->obj->driver_private; - if (!old_obj_priv->pin_count) - avail++; } /* None available, try to steal one or wait for a user to finish */ if (i == dev_priv->num_fence_regs) { - uint32_t seqno = dev_priv->mm.next_gem_seqno; + struct drm_i915_gem_object *old_obj_priv = NULL; loff_t offset; - if (avail == 0) - return -ENOMEM; - +try_again: + /* Could try to use LRU here instead... */ for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { - uint32_t this_seqno; - reg = &dev_priv->fence_regs[i]; old_obj_priv = reg->obj->driver_private; - - if (old_obj_priv->pin_count) - continue; - - /* i915 uses fences for GPU access to tiled buffers */ - if (IS_I965G(dev) || !old_obj_priv->active) + if (!old_obj_priv->pin_count) break; - - /* find the seqno of the first available fence */ - this_seqno = old_obj_priv->last_rendering_seqno; - if (this_seqno != 0 && - reg->obj->write_domain == 0 && - i915_seqno_passed(seqno, this_seqno)) - seqno = this_seqno; } /* @@ -1632,25 +1606,15 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) * objects to finish before trying again. */ if (i == dev_priv->num_fence_regs) { - if (seqno == dev_priv->mm.next_gem_seqno) { - i915_gem_flush(dev, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); - seqno = i915_add_request(dev, - I915_GEM_GPU_DOMAINS); - if (seqno == 0) - return -ENOMEM; - } - - ret = i915_wait_request(dev, seqno); - if (ret) + ret = i915_gem_object_set_to_gtt_domain(reg->obj, 0); + if (ret) { + WARN(ret != -ERESTARTSYS, + "switch to GTT domain failed: %d\n", ret); return ret; + } goto try_again; } - BUG_ON(old_obj_priv->active || - (reg->obj->write_domain & I915_GEM_GPU_DOMAINS)); - /* * Zap this virtual mapping so we can set up a fence again * for this object next time we need it. @@ -1691,17 +1655,8 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj) if (IS_I965G(dev)) I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); - else { - uint32_t fence_reg; - - if (obj_priv->fence_reg < 8) - fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; - else - fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - - 8) * 4; - - I915_WRITE(fence_reg, 0); - } + else + I915_WRITE(FENCE_REG_830_0 + (obj_priv->fence_reg * 4), 0); dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; obj_priv->fence_reg = I915_FENCE_REG_NONE; @@ -2514,7 +2469,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_gem_object **object_list = NULL; struct drm_gem_object *batch_obj; - struct drm_i915_gem_object *obj_priv; int ret, i, pinned = 0; uint64_t exec_offset; uint32_t seqno, flush_domains; @@ -2579,15 +2533,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, ret = -EBADF; goto err; } - - obj_priv = object_list[i]->driver_private; - if (obj_priv->in_execbuffer) { - DRM_ERROR("Object %p appears more than once in object list\n", - object_list[i]); - ret = -EBADF; - goto err; - } - obj_priv->in_execbuffer = true; } /* Pin and relocate */ @@ -2729,13 +2674,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, for (i = 0; i < pinned; i++) i915_gem_object_unpin(object_list[i]); - for (i = 0; i < args->buffer_count; i++) { - if (object_list[i]) { - obj_priv = object_list[i]->driver_private; - obj_priv->in_execbuffer = false; - } + for (i = 0; i < args->buffer_count; i++) drm_gem_object_unreference(object_list[i]); - } mutex_unlock(&dev->struct_mutex); @@ -2772,24 +2712,17 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) ret = i915_gem_object_bind_to_gtt(obj, alignment); if (ret != 0) { if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to bind: %d\n", ret); - return ret; - } - } - /* - * Pre-965 chips need a fence register set up in order to - * properly handle tiled surfaces. - */ - if (!IS_I965G(dev) && - obj_priv->fence_reg == I915_FENCE_REG_NONE && - obj_priv->tiling_mode != I915_TILING_NONE) { - ret = i915_gem_object_get_fence_reg(obj, true); - if (ret != 0) { - if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to install fence: %d\n", - ret); + DRM_ERROR("Failure to bind: %d", ret); return ret; } + /* + * Pre-965 chips need a fence register set up in order to + * properly handle tiled surfaces. + */ + if (!IS_I965G(dev) && + obj_priv->fence_reg == I915_FENCE_REG_NONE && + obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_get_fence_reg(obj, true); } obj_priv->pin_count++; diff --git a/trunk/drivers/gpu/drm/i915/i915_reg.h b/trunk/drivers/gpu/drm/i915/i915_reg.h index 90600d899413..9d6539a868b3 100644 --- a/trunk/drivers/gpu/drm/i915/i915_reg.h +++ b/trunk/drivers/gpu/drm/i915/i915_reg.h @@ -184,7 +184,6 @@ * Fence registers */ #define FENCE_REG_830_0 0x2000 -#define FENCE_REG_945_8 0x3000 #define I830_FENCE_START_MASK 0x07f80000 #define I830_FENCE_TILING_Y_SHIFT 12 #define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8) diff --git a/trunk/drivers/gpu/drm/i915/i915_suspend.c b/trunk/drivers/gpu/drm/i915/i915_suspend.c index d669cc2b42c0..5d84027ee8f3 100644 --- a/trunk/drivers/gpu/drm/i915/i915_suspend.c +++ b/trunk/drivers/gpu/drm/i915/i915_suspend.c @@ -119,6 +119,11 @@ static void i915_save_vga(struct drm_device *dev) /* VGA color palette registers */ dev_priv->saveDACMASK = I915_READ8(VGA_DACMASK); + /* DACCRX automatically increments during read */ + I915_WRITE8(VGA_DACRX, 0); + /* Read 3 bytes of color data from each index */ + for (i = 0; i < 256 * 3; i++) + dev_priv->saveDACDATA[i] = I915_READ8(VGA_DACDATA); /* MSR bits */ dev_priv->saveMSR = I915_READ8(VGA_MSR_READ); @@ -220,6 +225,12 @@ static void i915_restore_vga(struct drm_device *dev) /* VGA color palette registers */ I915_WRITE8(VGA_DACMASK, dev_priv->saveDACMASK); + /* DACCRX automatically increments during read */ + I915_WRITE8(VGA_DACWX, 0); + /* Read 3 bytes of color data from each index */ + for (i = 0; i < 256 * 3; i++) + I915_WRITE8(VGA_DACDATA, dev_priv->saveDACDATA[i]); + } int i915_save_state(struct drm_device *dev) diff --git a/trunk/drivers/video/aty/aty128fb.c b/trunk/drivers/video/aty/aty128fb.c index 35e8eb02b9e9..2181ce4d7ebd 100644 --- a/trunk/drivers/video/aty/aty128fb.c +++ b/trunk/drivers/video/aty/aty128fb.c @@ -1853,14 +1853,13 @@ static void aty128_bl_exit(struct backlight_device *bd) * Initialisation */ -#ifdef CONFIG_PPC_PMAC__disabled +#ifdef CONFIG_PPC_PMAC static void aty128_early_resume(void *data) { struct aty128fb_par *par = data; if (try_acquire_console_sem()) return; - pci_restore_state(par->pdev); aty128_do_resume(par->pdev); release_console_sem(); } @@ -1908,14 +1907,7 @@ static int __devinit aty128_init(struct pci_dev *pdev, const struct pci_device_i /* Indicate sleep capability */ if (par->chip_gen == rage_M3) { pmac_call_feature(PMAC_FTR_DEVICE_CAN_WAKE, NULL, 0, 1); -#if 0 /* Disable the early video resume hack for now as it's causing problems, among - * others we now rely on the PCI core restoring the config space for us, which - * isn't the case with that hack, and that code path causes various things to - * be called with interrupts off while they shouldn't. I'm leaving the code in - * as it can be useful for debugging purposes - */ pmac_set_early_video_resume(aty128_early_resume, par); -#endif } /* Find default mode */ diff --git a/trunk/drivers/video/aty/radeon_pm.c b/trunk/drivers/video/aty/radeon_pm.c index 81603f85e17e..ca5f0dc28546 100644 --- a/trunk/drivers/video/aty/radeon_pm.c +++ b/trunk/drivers/video/aty/radeon_pm.c @@ -2762,13 +2762,12 @@ int radeonfb_pci_resume(struct pci_dev *pdev) return rc; } -#ifdef CONFIG_PPC_OF__disabled +#ifdef CONFIG_PPC_OF static void radeonfb_early_resume(void *data) { struct radeonfb_info *rinfo = data; rinfo->no_schedule = 1; - pci_restore_state(rinfo->pdev); radeonfb_pci_resume(rinfo->pdev); rinfo->no_schedule = 0; } @@ -2835,14 +2834,7 @@ void radeonfb_pm_init(struct radeonfb_info *rinfo, int dynclk, int ignore_devlis */ if (rinfo->pm_mode != radeon_pm_none) { pmac_call_feature(PMAC_FTR_DEVICE_CAN_WAKE, rinfo->of_node, 0, 1); -#if 0 /* Disable the early video resume hack for now as it's causing problems, among - * others we now rely on the PCI core restoring the config space for us, which - * isn't the case with that hack, and that code path causes various things to - * be called with interrupts off while they shouldn't. I'm leaving the code in - * as it can be useful for debugging purposes - */ pmac_set_early_video_resume(radeonfb_early_resume, rinfo); -#endif } #if 0 diff --git a/trunk/fs/fat/inode.c b/trunk/fs/fat/inode.c index de0004fe6e00..6b74d09adbe5 100644 --- a/trunk/fs/fat/inode.c +++ b/trunk/fs/fat/inode.c @@ -202,9 +202,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block) sector_t blocknr; /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ - down_read(&mapping->host->i_alloc_sem); + mutex_lock(&mapping->host->i_mutex); blocknr = generic_block_bmap(mapping, block, fat_get_block); - up_read(&mapping->host->i_alloc_sem); + mutex_unlock(&mapping->host->i_mutex); return blocknr; } diff --git a/trunk/fs/proc/page.c b/trunk/fs/proc/page.c index e9983837d08d..2d1345112a42 100644 --- a/trunk/fs/proc/page.c +++ b/trunk/fs/proc/page.c @@ -80,7 +80,7 @@ static const struct file_operations proc_kpagecount_operations = { #define KPF_RECLAIM 9 #define KPF_BUDDY 10 -#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos) +#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) static ssize_t kpageflags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) diff --git a/trunk/include/drm/drm_edid.h b/trunk/include/drm/drm_edid.h index a11cc9d32591..ff8d27af4786 100644 --- a/trunk/include/drm/drm_edid.h +++ b/trunk/include/drm/drm_edid.h @@ -69,8 +69,8 @@ struct detailed_pixel_timing { u8 hborder; u8 vborder; u8 unknown0:1; - u8 hsync_positive:1; u8 vsync_positive:1; + u8 hsync_positive:1; u8 separate_sync:2; u8 stereo:1; u8 unknown6:1;