diff --git a/[refs] b/[refs] index 0dcf932b834a..8bf3a29ba7f8 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: b9791184e7cc817ea5810405b49ead0b585e7e2d +refs/heads/master: 8d3ef7461c2460c1bc02c63279b638fae80bba37 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index db2603ceabba..7086f0a90d14 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -164,6 +164,15 @@ running once the system is up. over-ride platform specific driver. See also Documentation/acpi-hotkey.txt. + enable_timer_pin_1 [i386,x86-64] + Enable PIN 1 of APIC timer + Can be useful to work around chipset bugs (in particular on some ATI chipsets) + The kernel tries to set a reasonable default. + + disable_timer_pin_1 [i386,x86-64] + Disable PIN 1 of APIC timer + Can be useful to work around chipset bugs. + ad1816= [HW,OSS] Format: ,,, See also Documentation/sound/oss/AD1816. diff --git a/trunk/Documentation/x86_64/boot-options.txt b/trunk/Documentation/x86_64/boot-options.txt index 678e8f192db2..ffe1c062088b 100644 --- a/trunk/Documentation/x86_64/boot-options.txt +++ b/trunk/Documentation/x86_64/boot-options.txt @@ -11,6 +11,11 @@ Machine check If your BIOS doesn't do that it's a good idea to enable though to make sure you log even machine check events that result in a reboot. + mce=tolerancelevel (number) + 0: always panic, 1: panic if deadlock possible, + 2: try to avoid panic, 3: never panic or exit (for testing) + default is 1 + Can be also set using sysfs which is preferable. nomce (for compatibility with i386): same as mce=off diff --git a/trunk/arch/i386/kernel/acpi/earlyquirk.c b/trunk/arch/i386/kernel/acpi/earlyquirk.c index f1b9d2a46dab..1ae2aeeda18b 100644 --- a/trunk/arch/i386/kernel/acpi/earlyquirk.c +++ b/trunk/arch/i386/kernel/acpi/earlyquirk.c @@ -7,6 +7,7 @@ #include #include #include +#include static int __init check_bridge(int vendor, int device) { @@ -15,6 +16,15 @@ static int __init check_bridge(int vendor, int device) if (vendor == PCI_VENDOR_ID_NVIDIA) { acpi_skip_timer_override = 1; } +#ifdef CONFIG_X86_LOCAL_APIC + /* + * ATI IXP chipsets get double timer interrupts. + * For now just do this for all ATI chipsets. + * FIXME: this needs to be checked for the non ACPI case too. + */ + if (vendor == PCI_VENDOR_ID_ATI) + disable_timer_pin_1 = 1; +#endif return 0; } diff --git a/trunk/arch/i386/kernel/io_apic.c b/trunk/arch/i386/kernel/io_apic.c index 35d3ce26a544..378313b0cce9 100644 --- a/trunk/arch/i386/kernel/io_apic.c +++ b/trunk/arch/i386/kernel/io_apic.c @@ -60,6 +60,8 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; +int disable_timer_pin_1 __initdata; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -2211,6 +2213,8 @@ static inline void check_timer(void) setup_nmi(); enable_8259A_irq(0); } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); return; } clear_IO_APIC_pin(0, pin1); diff --git a/trunk/arch/i386/kernel/setup.c b/trunk/arch/i386/kernel/setup.c index f3d808451d25..dc39ca6a7eca 100644 --- a/trunk/arch/i386/kernel/setup.c +++ b/trunk/arch/i386/kernel/setup.c @@ -851,6 +851,11 @@ static void __init parse_cmdline_early (char ** cmdline_p) #endif #ifdef CONFIG_X86_LOCAL_APIC + if (!memcmp(from, "disable_timer_pin_1", 19)) + disable_timer_pin_1 = 1; + if (!memcmp(from, "enable_timer_pin_1", 18)) + disable_timer_pin_1 = -1; + /* disable IO-APIC */ else if (!memcmp(from, "noapic", 6)) disable_ioapic_setup(); diff --git a/trunk/arch/i386/kernel/srat.c b/trunk/arch/i386/kernel/srat.c index 7b3b27d64409..516bf5653b02 100644 --- a/trunk/arch/i386/kernel/srat.c +++ b/trunk/arch/i386/kernel/srat.c @@ -213,12 +213,18 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } +static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ + +int pxm_to_node(int pxm) +{ + return pxm_to_nid_map[pxm]; +} + /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ diff --git a/trunk/arch/i386/pci/acpi.c b/trunk/arch/i386/pci/acpi.c index 42913f43feb0..2941674f35eb 100644 --- a/trunk/arch/i386/pci/acpi.c +++ b/trunk/arch/i386/pci/acpi.c @@ -3,16 +3,31 @@ #include #include #include +#include #include "pci.h" struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) { + struct pci_bus *bus; + if (domain != 0) { printk(KERN_WARNING "PCI: Multiple domains not supported\n"); return NULL; } - return pcibios_scan_root(busnum); + bus = pcibios_scan_root(busnum); +#ifdef CONFIG_ACPI_NUMA + if (bus != NULL) { + int pxm = acpi_get_pxm(device->handle); + if (pxm >= 0) { + bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm); + printk("bus %d -> pxm %d -> node %ld\n", + busnum, pxm, (long)(bus->sysdata)); + } + } +#endif + + return bus; } extern int pci_routeirq; diff --git a/trunk/arch/i386/pci/mmconfig.c b/trunk/arch/i386/pci/mmconfig.c index 60f0e7a1162a..dfbf80cff834 100644 --- a/trunk/arch/i386/pci/mmconfig.c +++ b/trunk/arch/i386/pci/mmconfig.c @@ -127,13 +127,6 @@ static int __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) goto out; - /* Kludge for now. Don't use mmconfig on AMD systems because - those have some busses where mmconfig doesn't work, - and we don't parse ACPI MCFG well enough to handle that. - Remove when proper handling is added. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - goto out; - printk(KERN_INFO "PCI: Using MMCONFIG\n"); raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; diff --git a/trunk/arch/ia64/ia32/sys_ia32.c b/trunk/arch/ia64/ia32/sys_ia32.c index e29a8a55486a..3fa67ecebc83 100644 --- a/trunk/arch/ia64/ia32/sys_ia32.c +++ b/trunk/arch/ia64/ia32/sys_ia32.c @@ -2327,7 +2327,7 @@ sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count) ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count); set_fs(old_fs); - if (!ret && offset && put_user(of, offset)) + if (offset && put_user(of, offset)) return -EFAULT; return ret; diff --git a/trunk/arch/s390/kernel/compat_linux.c b/trunk/arch/s390/kernel/compat_linux.c index 18610cea03a2..ed877d0f27e6 100644 --- a/trunk/arch/s390/kernel/compat_linux.c +++ b/trunk/arch/s390/kernel/compat_linux.c @@ -678,7 +678,7 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, size ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); set_fs(old_fs); - if (!ret && offset && put_user(of, offset)) + if (offset && put_user(of, offset)) return -EFAULT; return ret; diff --git a/trunk/arch/x86_64/boot/Makefile b/trunk/arch/x86_64/boot/Makefile index f4399c701b77..18c6e915d69b 100644 --- a/trunk/arch/x86_64/boot/Makefile +++ b/trunk/arch/x86_64/boot/Makefile @@ -46,7 +46,7 @@ cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \ $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \ $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @echo 'Kernel: $@ is ready' + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) diff --git a/trunk/arch/x86_64/boot/compressed/misc.c b/trunk/arch/x86_64/boot/compressed/misc.c index b38d5b8b5fb8..0e10fd84c7cc 100644 --- a/trunk/arch/x86_64/boot/compressed/misc.c +++ b/trunk/arch/x86_64/boot/compressed/misc.c @@ -83,7 +83,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */ #endif #define SCREEN_INFO (*(struct screen_info *)(real_mode+0)) -extern char input_data[]; +extern unsigned char input_data[]; extern int input_len; static long bytes_out = 0; @@ -288,7 +288,7 @@ void setup_normal_output_buffer(void) #else if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ + output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -305,7 +305,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv) #else if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); #endif - mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; + mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; low_buffer_size = low_buffer_end - LOW_BUFFER_START; diff --git a/trunk/arch/x86_64/defconfig b/trunk/arch/x86_64/defconfig index bf57e2362bf4..f8db7e500fbf 100644 --- a/trunk/arch/x86_64/defconfig +++ b/trunk/arch/x86_64/defconfig @@ -1,11 +1,12 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-rc6-git3 -# Fri Aug 12 16:40:34 2005 +# Linux kernel version: 2.6.13-git11 +# Mon Sep 12 16:16:16 2005 # CONFIG_X86_64=y CONFIG_64BIT=y CONFIG_X86=y +CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_MMU=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y @@ -13,6 +14,7 @@ CONFIG_X86_CMPXCHG=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y # # Code maturity level options @@ -26,6 +28,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 # General setup # CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,6 +40,7 @@ CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_INITRAMFS_SOURCE="" # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -102,6 +106,7 @@ CONFIG_DISCONTIGMEM_MANUAL=y CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_HAVE_DEC_LOCK=y CONFIG_NR_CPUS=32 @@ -122,6 +127,7 @@ CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y +CONFIG_GENERIC_PENDING_IRQ=y # # Power management options @@ -194,7 +200,6 @@ CONFIG_UNORDERED_IO=y # CONFIG_PCIEPORTBUS is not set CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY_PROC is not set -# CONFIG_PCI_NAMES is not set # CONFIG_PCI_DEBUG is not set # @@ -234,7 +239,10 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_FIB_HASH=y -# CONFIG_IP_PNP is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set # CONFIG_IP_MROUTE is not set @@ -244,8 +252,8 @@ CONFIG_IP_FIB_HASH=y # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -CONFIG_IP_TCPDIAG_IPV6=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_BIC=y CONFIG_IPV6=y @@ -257,6 +265,11 @@ CONFIG_IPV6=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_NETFILTER is not set +# +# DCCP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP is not set + # # SCTP Configuration (EXPERIMENTAL) # @@ -280,9 +293,11 @@ CONFIG_IPV6=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NETFILTER_NETLINK is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set # # Device Drivers @@ -329,7 +344,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" CONFIG_LBD=y # CONFIG_CDROM_PKTCDVD is not set @@ -409,6 +423,7 @@ CONFIG_IDEDMA_AUTO=y # # SCSI device support # +# CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set @@ -432,7 +447,7 @@ CONFIG_BLK_DEV_SD=y # # SCSI Transport Attributes # -# CONFIG_SCSI_SPI_ATTRS is not set +CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set @@ -458,6 +473,7 @@ CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y +# CONFIG_SCSI_SATA_MV is not set # CONFIG_SCSI_SATA_NV is not set # CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_QSTOR is not set @@ -536,6 +552,11 @@ CONFIG_TUN=y # # CONFIG_ARCNET is not set +# +# PHY device support +# +# CONFIG_PHYLIB is not set + # # Ethernet (10 or 100Mbit) # @@ -586,6 +607,7 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set @@ -595,6 +617,7 @@ CONFIG_TIGON3=y # # Ethernet (10000 Mbit) # +# CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set @@ -749,7 +772,6 @@ CONFIG_MAX_RAW_DEVS=256 # I2C support # # CONFIG_I2C is not set -# CONFIG_I2C_SENSOR is not set # # Dallas's 1-wire bus @@ -760,6 +782,7 @@ CONFIG_MAX_RAW_DEVS=256 # Hardware Monitoring support # CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -767,6 +790,10 @@ CONFIG_HWMON=y # # CONFIG_IBM_ASM is not set +# +# Multimedia Capabilities Port drivers +# + # # Multimedia devices # @@ -858,9 +885,8 @@ CONFIG_USB_UHCI_HCD=y # # USB Device Class drivers # -# CONFIG_USB_AUDIO is not set +# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set # CONFIG_USB_BLUETOOTH_TTY is not set -# CONFIG_USB_MIDI is not set # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y @@ -877,6 +903,7 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # # USB Input Devices @@ -893,6 +920,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_MTOUCH is not set # CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set +# CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set @@ -976,6 +1004,8 @@ CONFIG_USB_MON=y # Firmware Drivers # # CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +CONFIG_DCDBAS=m # # File systems @@ -1000,10 +1030,6 @@ CONFIG_REISERFS_FS_POSIX_ACL=y # CONFIG_REISERFS_FS_SECURITY is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y - -# -# XFS support -# # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set @@ -1012,6 +1038,7 @@ CONFIG_INOTIFY=y CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=y # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems @@ -1037,12 +1064,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y -# CONFIG_TMPFS_XATTR is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -1074,6 +1100,7 @@ CONFIG_NFSD_V3=y # CONFIG_NFSD_V3_ACL is not set # CONFIG_NFSD_V4 is not set CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y @@ -1086,6 +1113,7 @@ CONFIG_SUNRPC=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -1150,6 +1178,7 @@ CONFIG_OPROFILE=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=18 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1157,6 +1186,7 @@ CONFIG_LOG_BUF_SHIFT=18 # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_FRAME_POINTER is not set CONFIG_INIT_DEBUG=y # CONFIG_IOMMU_DEBUG is not set CONFIG_KPROBES=y @@ -1180,5 +1210,6 @@ CONFIG_KPROBES=y # Library routines # # CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set diff --git a/trunk/arch/x86_64/ia32/ia32entry.S b/trunk/arch/x86_64/ia32/ia32entry.S index 5244f803203d..e0eb0c712fe9 100644 --- a/trunk/arch/x86_64/ia32/ia32entry.S +++ b/trunk/arch/x86_64/ia32/ia32entry.S @@ -55,20 +55,34 @@ * with the int 0x80 path. */ ENTRY(ia32_sysenter_target) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,0 + CFI_REGISTER rsp,rbp swapgs movq %gs:pda_kernelstack, %rsp addq $(PDA_STACKOFFSET),%rsp sti movl %ebp,%ebp /* zero extension */ pushq $__USER32_DS + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET ss,0*/ pushq %rbp + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rsp,0 pushfq + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET rflags,0*/ movl $VSYSCALL32_SYSEXIT, %r10d + CFI_REGISTER rip,r10 pushq $__USER32_CS + CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET cs,0*/ movl %eax, %eax pushq %r10 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rip,0 pushq %rax + CFI_ADJUST_CFA_OFFSET 8 cld SAVE_ARGS 0,0,1 /* no need to do an access_ok check here because rbp has been @@ -79,6 +93,7 @@ ENTRY(ia32_sysenter_target) .previous GET_THREAD_INFO(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + CFI_REMEMBER_STATE jnz sysenter_tracesys sysenter_do_call: cmpl $(IA32_NR_syscalls),%eax @@ -94,14 +109,20 @@ sysenter_do_call: andl $~0x200,EFLAGS-R11(%rsp) RESTORE_ARGS 1,24,1,1,1,1 popfq + CFI_ADJUST_CFA_OFFSET -8 + /*CFI_RESTORE rflags*/ popq %rcx /* User %esp */ + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rsp,rcx movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ + CFI_REGISTER rip,rdx swapgs sti /* sti only takes effect after the next instruction */ /* sysexit */ .byte 0xf, 0x35 sysenter_tracesys: + CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -140,21 +161,28 @@ sysenter_tracesys: * with the int 0x80 path. */ ENTRY(ia32_cstar_target) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,0 + CFI_REGISTER rip,rcx + /*CFI_REGISTER rflags,r11*/ swapgs movl %esp,%r8d + CFI_REGISTER rsp,r8 movq %gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1,1 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rip,RIP-ARGOFFSET movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ movl %ebp,%ecx movq $__USER32_CS,CS-ARGOFFSET(%rsp) movq $__USER32_DS,SS-ARGOFFSET(%rsp) movq %r11,EFLAGS-ARGOFFSET(%rsp) + /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ movq %r8,RSP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rsp,RSP-ARGOFFSET /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ @@ -164,6 +192,7 @@ ENTRY(ia32_cstar_target) .previous GET_THREAD_INFO(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: cmpl $IA32_NR_syscalls,%eax @@ -177,12 +206,16 @@ cstar_do_call: jnz int_ret_from_sys_call RESTORE_ARGS 1,-ARG_SKIP,1,1,1 movl RIP-ARGOFFSET(%rsp),%ecx + CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d + /*CFI_REGISTER rflags,r11*/ movl RSP-ARGOFFSET(%rsp),%esp + CFI_RESTORE rsp swapgs sysretl cstar_tracesys: + CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -226,11 +259,18 @@ ia32_badarg: */ ENTRY(ia32_syscall) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-RIP + /*CFI_REL_OFFSET ss,SS-RIP*/ + CFI_REL_OFFSET rsp,RSP-RIP + /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ + /*CFI_REL_OFFSET cs,CS-RIP*/ + CFI_REL_OFFSET rip,RIP-RIP swapgs sti movl %eax,%eax pushq %rax + CFI_ADJUST_CFA_OFFSET 8 cld /* note the registers are not zero extended to the sf. this could be a problem. */ @@ -278,6 +318,8 @@ quiet_ni_syscall: jmp ia32_ptregs_common .endm + CFI_STARTPROC + PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx @@ -290,8 +332,9 @@ quiet_ni_syscall: PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx ENTRY(ia32_ptregs_common) - CFI_STARTPROC popq %r11 + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rip, r11 SAVE_REST call *%rax RESTORE_REST diff --git a/trunk/arch/x86_64/ia32/sys_ia32.c b/trunk/arch/x86_64/ia32/sys_ia32.c index 04d80406ce4f..5389df610e78 100644 --- a/trunk/arch/x86_64/ia32/sys_ia32.c +++ b/trunk/arch/x86_64/ia32/sys_ia32.c @@ -751,7 +751,7 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); set_fs(old_fs); - if (!ret && offset && put_user(of, offset)) + if (offset && put_user(of, offset)) return -EFAULT; return ret; diff --git a/trunk/arch/x86_64/kernel/Makefile b/trunk/arch/x86_64/kernel/Makefile index 1579bdd0adcd..bcdd0a805fe7 100644 --- a/trunk/arch/x86_64/kernel/Makefile +++ b/trunk/arch/x86_64/kernel/Makefile @@ -46,3 +46,4 @@ microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o +msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o diff --git a/trunk/arch/x86_64/kernel/acpi/sleep.c b/trunk/arch/x86_64/kernel/acpi/sleep.c index 148f6f7ea315..867a0ebee177 100644 --- a/trunk/arch/x86_64/kernel/acpi/sleep.c +++ b/trunk/arch/x86_64/kernel/acpi/sleep.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/trunk/arch/x86_64/kernel/aperture.c b/trunk/arch/x86_64/kernel/aperture.c index c9a6b812e926..962ad4823b6a 100644 --- a/trunk/arch/x86_64/kernel/aperture.c +++ b/trunk/arch/x86_64/kernel/aperture.c @@ -245,6 +245,8 @@ void __init iommu_hole_init(void) if (aper_alloc) { /* Got the aperture from the AGP bridge */ + } else if (swiotlb && !valid_agp) { + /* Do nothing */ } else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) || force_iommu || valid_agp || diff --git a/trunk/arch/x86_64/kernel/apic.c b/trunk/arch/x86_64/kernel/apic.c index 375d369570ca..b6e7715d877f 100644 --- a/trunk/arch/x86_64/kernel/apic.c +++ b/trunk/arch/x86_64/kernel/apic.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -109,11 +108,8 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); v = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_INTEGRATED(v)) { /* !82489DX */ - if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); } void __init connect_bsp_APIC(void) @@ -316,8 +312,6 @@ void __init init_bsp_APIC(void) */ apic_write_around(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); } @@ -325,14 +319,6 @@ void __cpuinit setup_local_APIC (void) { unsigned int value, ver, maxlvt; - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } - value = apic_read(APIC_LVR); ver = GET_APIC_VERSION(value); @@ -430,15 +416,11 @@ void __cpuinit setup_local_APIC (void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; apic_write_around(APIC_LVT1, value); - if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ + { unsigned oldvalue; maxlvt = get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); oldvalue = apic_read(APIC_ESR); value = ERROR_APIC_VECTOR; // enables sending errors apic_write_around(APIC_LVTERR, value); @@ -452,17 +434,6 @@ void __cpuinit setup_local_APIC (void) apic_printk(APIC_VERBOSE, "ESR value after enabling vector: %08x, after %08x\n", oldvalue, value); - } else { - if (esr_disable) - /* - * Something untraceble is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - apic_printk(APIC_DEBUG, "Leaving ESR disabled.\n"); - else - apic_printk(APIC_DEBUG, "No ESR for 82489DX.\n"); } nmi_watchdog_default(); @@ -650,8 +621,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_id == -1U) - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); #ifdef CONFIG_X86_IO_APIC { @@ -693,8 +663,6 @@ static void __setup_APIC_LVTT(unsigned int clocks) ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - if (!APIC_INTEGRATED(ver)) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); /* @@ -1081,7 +1049,7 @@ int __init APIC_init_uniprocessor (void) connect_bsp_APIC(); - phys_cpu_present_map = physid_mask_of_physid(0); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); apic_write_around(APIC_ID, boot_cpu_id); setup_local_APIC(); diff --git a/trunk/arch/x86_64/kernel/asm-offsets.c b/trunk/arch/x86_64/kernel/asm-offsets.c index 35b4c3fcbb37..aaa6d3833517 100644 --- a/trunk/arch/x86_64/kernel/asm-offsets.c +++ b/trunk/arch/x86_64/kernel/asm-offsets.c @@ -39,7 +39,6 @@ int main(void) ENTRY(kernelstack); ENTRY(oldrsp); ENTRY(pcurrent); - ENTRY(irqrsp); ENTRY(irqcount); ENTRY(cpunumber); ENTRY(irqstackptr); diff --git a/trunk/arch/x86_64/kernel/crash.c b/trunk/arch/x86_64/kernel/crash.c index d7fa4248501c..535e04466079 100644 --- a/trunk/arch/x86_64/kernel/crash.c +++ b/trunk/arch/x86_64/kernel/crash.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/trunk/arch/x86_64/kernel/e820.c b/trunk/arch/x86_64/kernel/e820.c index bb0ae18ec02b..eb7929eea7b3 100644 --- a/trunk/arch/x86_64/kernel/e820.c +++ b/trunk/arch/x86_64/kernel/e820.c @@ -131,7 +131,7 @@ void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned lon if (ei->type != E820_RAM || ei->addr+ei->size <= start || - ei->addr > end) + ei->addr >= end) continue; addr = round_up(ei->addr, PAGE_SIZE); diff --git a/trunk/arch/x86_64/kernel/early_printk.c b/trunk/arch/x86_64/kernel/early_printk.c index 9631c747c5e3..9cd968dd0f5a 100644 --- a/trunk/arch/x86_64/kernel/early_printk.c +++ b/trunk/arch/x86_64/kernel/early_printk.c @@ -5,6 +5,7 @@ #include #include #include +#include /* Simple VGA output */ @@ -158,6 +159,47 @@ static struct console early_serial_console = { .index = -1, }; +/* Console interface to a host file on AMD's SimNow! */ + +static int simnow_fd; + +enum { + MAGIC1 = 0xBACCD00A, + MAGIC2 = 0xCA110000, + XOPEN = 5, + XWRITE = 4, +}; + +static noinline long simnow(long cmd, long a, long b, long c) +{ + long ret; + asm volatile("cpuid" : + "=a" (ret) : + "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); + return ret; +} + +void __init simnow_init(char *str) +{ + char *fn = "klog"; + if (*str == '=') + fn = ++str; + /* error ignored */ + simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); +} + +static void simnow_write(struct console *con, const char *s, unsigned n) +{ + simnow(XWRITE, simnow_fd, (unsigned long)s, n); +} + +static struct console simnow_console = { + .name = "simnow", + .write = simnow_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + /* Direct interface for emergencies */ struct console *early_console = &early_vga_console; static int early_console_initialized = 0; @@ -205,6 +247,10 @@ int __init setup_early_printk(char *opt) max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; early_console = &early_vga_console; + } else if (!strncmp(buf, "simnow", 6)) { + simnow_init(buf + 6); + early_console = &simnow_console; + keep_early = 1; } early_console_initialized = 1; register_console(early_console); diff --git a/trunk/arch/x86_64/kernel/entry.S b/trunk/arch/x86_64/kernel/entry.S index 3620508c8bd9..7937971d1853 100644 --- a/trunk/arch/x86_64/kernel/entry.S +++ b/trunk/arch/x86_64/kernel/entry.S @@ -79,16 +79,19 @@ xorl %eax, %eax pushq %rax /* ss */ CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET ss,0*/ pushq %rax /* rsp */ CFI_ADJUST_CFA_OFFSET 8 - CFI_OFFSET rip,0 + CFI_REL_OFFSET rsp,0 pushq $(1<<9) /* eflags - interrupts on */ CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET rflags,0*/ pushq $__KERNEL_CS /* cs */ CFI_ADJUST_CFA_OFFSET 8 + /*CFI_REL_OFFSET cs,0*/ pushq \child_rip /* rip */ CFI_ADJUST_CFA_OFFSET 8 - CFI_OFFSET rip,0 + CFI_REL_OFFSET rip,0 pushq %rax /* orig rax */ CFI_ADJUST_CFA_OFFSET 8 .endm @@ -98,32 +101,39 @@ CFI_ADJUST_CFA_OFFSET -(6*8) .endm - .macro CFI_DEFAULT_STACK - CFI_ADJUST_CFA_OFFSET (SS) - CFI_OFFSET r15,R15-SS - CFI_OFFSET r14,R14-SS - CFI_OFFSET r13,R13-SS - CFI_OFFSET r12,R12-SS - CFI_OFFSET rbp,RBP-SS - CFI_OFFSET rbx,RBX-SS - CFI_OFFSET r11,R11-SS - CFI_OFFSET r10,R10-SS - CFI_OFFSET r9,R9-SS - CFI_OFFSET r8,R8-SS - CFI_OFFSET rax,RAX-SS - CFI_OFFSET rcx,RCX-SS - CFI_OFFSET rdx,RDX-SS - CFI_OFFSET rsi,RSI-SS - CFI_OFFSET rdi,RDI-SS - CFI_OFFSET rsp,RSP-SS - CFI_OFFSET rip,RIP-SS + .macro CFI_DEFAULT_STACK start=1 + .if \start + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8 + .else + CFI_DEF_CFA_OFFSET SS+8 + .endif + CFI_REL_OFFSET r15,R15 + CFI_REL_OFFSET r14,R14 + CFI_REL_OFFSET r13,R13 + CFI_REL_OFFSET r12,R12 + CFI_REL_OFFSET rbp,RBP + CFI_REL_OFFSET rbx,RBX + CFI_REL_OFFSET r11,R11 + CFI_REL_OFFSET r10,R10 + CFI_REL_OFFSET r9,R9 + CFI_REL_OFFSET r8,R8 + CFI_REL_OFFSET rax,RAX + CFI_REL_OFFSET rcx,RCX + CFI_REL_OFFSET rdx,RDX + CFI_REL_OFFSET rsi,RSI + CFI_REL_OFFSET rdi,RDI + CFI_REL_OFFSET rip,RIP + /*CFI_REL_OFFSET cs,CS*/ + /*CFI_REL_OFFSET rflags,EFLAGS*/ + CFI_REL_OFFSET rsp,RSP + /*CFI_REL_OFFSET ss,SS*/ .endm /* * A newly forked process directly context switches into this. */ /* rdi: prev */ ENTRY(ret_from_fork) - CFI_STARTPROC CFI_DEFAULT_STACK call schedule_tail GET_THREAD_INFO(%rcx) @@ -172,16 +182,21 @@ rff_trace: */ ENTRY(system_call) - CFI_STARTPROC + CFI_STARTPROC simple + CFI_DEF_CFA rsp,0 + CFI_REGISTER rip,rcx + /*CFI_REGISTER rflags,r11*/ swapgs movq %rsp,%gs:pda_oldrsp movq %gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - movq %rcx,RIP-ARGOFFSET(%rsp) + movq %rcx,RIP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) + CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -201,9 +216,12 @@ sysret_check: cli movl threadinfo_flags(%rcx),%edx andl %edi,%edx + CFI_REMEMBER_STATE jnz sysret_careful movq RIP-ARGOFFSET(%rsp),%rcx + CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 + /*CFI_REGISTER rflags,r11*/ movq %gs:pda_oldrsp,%rsp swapgs sysretq @@ -211,12 +229,15 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: + CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal sti pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi + CFI_ADJUST_CFA_OFFSET -8 jmp sysret_check /* Handle a signal */ @@ -234,8 +255,13 @@ sysret_signal: 1: movl $_TIF_NEED_RESCHED,%edi jmp sysret_check +badsys: + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) + jmp ret_from_sys_call + /* Do syscall tracing */ tracesys: + CFI_RESTORE_STATE SAVE_REST movq $-ENOSYS,RAX(%rsp) FIXUP_TOP_OF_STACK %rdi @@ -254,16 +280,29 @@ tracesys: RESTORE_TOP_OF_STACK %rbx RESTORE_REST jmp ret_from_sys_call + CFI_ENDPROC -badsys: - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp ret_from_sys_call - /* * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. */ -ENTRY(int_ret_from_sys_call) +ENTRY(int_ret_from_sys_call) + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-ARGOFFSET + /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ + CFI_REL_OFFSET rsp,RSP-ARGOFFSET + /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ + /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ + CFI_REL_OFFSET rip,RIP-ARGOFFSET + CFI_REL_OFFSET rdx,RDX-ARGOFFSET + CFI_REL_OFFSET rcx,RCX-ARGOFFSET + CFI_REL_OFFSET rax,RAX-ARGOFFSET + CFI_REL_OFFSET rdi,RDI-ARGOFFSET + CFI_REL_OFFSET rsi,RSI-ARGOFFSET + CFI_REL_OFFSET r8,R8-ARGOFFSET + CFI_REL_OFFSET r9,R9-ARGOFFSET + CFI_REL_OFFSET r10,R10-ARGOFFSET + CFI_REL_OFFSET r11,R11-ARGOFFSET cli testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args @@ -284,8 +323,10 @@ int_careful: jnc int_very_careful sti pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi + CFI_ADJUST_CFA_OFFSET -8 cli jmp int_with_check @@ -297,9 +338,11 @@ int_very_careful: testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx jz int_signal pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 leaq 8(%rsp),%rdi # &ptregs -> arg1 call syscall_trace_leave popq %rdi + CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi cli jmp int_restore_rest @@ -329,6 +372,8 @@ int_restore_rest: jmp ptregscall_common .endm + CFI_STARTPROC + PTREGSCALL stub_clone, sys_clone, %r8 PTREGSCALL stub_fork, sys_fork, %rdi PTREGSCALL stub_vfork, sys_vfork, %rdi @@ -337,40 +382,49 @@ int_restore_rest: PTREGSCALL stub_iopl, sys_iopl, %rsi ENTRY(ptregscall_common) - CFI_STARTPROC popq %r11 - CFI_ADJUST_CFA_OFFSET -8 + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rip, r11 SAVE_REST movq %r11, %r15 + CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call *%rax RESTORE_TOP_OF_STACK %r11 movq %r15, %r11 + CFI_REGISTER rip, r11 RESTORE_REST pushq %r11 - CFI_ADJUST_CFA_OFFSET 8 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rip, 0 ret CFI_ENDPROC ENTRY(stub_execve) CFI_STARTPROC popq %r11 - CFI_ADJUST_CFA_OFFSET -8 + CFI_ADJUST_CFA_OFFSET -8 + CFI_REGISTER rip, r11 SAVE_REST movq %r11, %r15 + CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call sys_execve GET_THREAD_INFO(%rcx) bt $TIF_IA32,threadinfo_flags(%rcx) + CFI_REMEMBER_STATE jc exec_32bit RESTORE_TOP_OF_STACK %r11 movq %r15, %r11 + CFI_REGISTER rip, r11 RESTORE_REST - push %r11 + pushq %r11 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rip, 0 ret exec_32bit: - CFI_ADJUST_CFA_OFFSET REST_SKIP + CFI_RESTORE_STATE movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call @@ -382,7 +436,8 @@ exec_32bit: */ ENTRY(stub_rt_sigreturn) CFI_STARTPROC - addq $8, %rsp + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 SAVE_REST movq %rsp,%rdi FIXUP_TOP_OF_STACK %r11 @@ -392,6 +447,25 @@ ENTRY(stub_rt_sigreturn) jmp int_ret_from_sys_call CFI_ENDPROC +/* + * initial frame state for interrupts and exceptions + */ + .macro _frame ref + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-\ref + /*CFI_REL_OFFSET ss,SS-\ref*/ + CFI_REL_OFFSET rsp,RSP-\ref + /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ + /*CFI_REL_OFFSET cs,CS-\ref*/ + CFI_REL_OFFSET rip,RIP-\ref + .endm + +/* initial frame state for interrupts (and exceptions without error code) */ +#define INTR_FRAME _frame RIP +/* initial frame state for exceptions with error code (and interrupts with + vector already pushed) */ +#define XCPT_FRAME _frame ORIG_RAX + /* * Interrupt entry/exit. * @@ -402,10 +476,6 @@ ENTRY(stub_rt_sigreturn) /* 0(%rsp): interrupt number */ .macro interrupt func - CFI_STARTPROC simple - CFI_DEF_CFA rsp,(SS-RDI) - CFI_REL_OFFSET rsp,(RSP-ORIG_RAX) - CFI_REL_OFFSET rip,(RIP-ORIG_RAX) cld #ifdef CONFIG_DEBUG_INFO SAVE_ALL @@ -425,23 +495,27 @@ ENTRY(stub_rt_sigreturn) swapgs 1: incl %gs:pda_irqcount # RED-PEN should check preempt count movq %gs:pda_irqstackptr,%rax - cmoveq %rax,%rsp + cmoveq %rax,%rsp /*todo This needs CFI annotation! */ pushq %rdi # save old stack + CFI_ADJUST_CFA_OFFSET 8 call \func .endm ENTRY(common_interrupt) + XCPT_FRAME interrupt do_IRQ /* 0(%rsp): oldrsp-ARGOFFSET */ -ret_from_intr: +ret_from_intr: popq %rdi + CFI_ADJUST_CFA_OFFSET -8 cli decl %gs:pda_irqcount #ifdef CONFIG_DEBUG_INFO movq RBP(%rdi),%rbp + CFI_DEF_CFA_REGISTER rsp #endif - leaq ARGOFFSET(%rdi),%rsp -exit_intr: + leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */ +exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) je retint_kernel @@ -453,9 +527,10 @@ exit_intr: */ retint_with_reschedule: movl $_TIF_WORK_MASK,%edi -retint_check: +retint_check: movl threadinfo_flags(%rcx),%edx andl %edi,%edx + CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: swapgs @@ -476,14 +551,17 @@ bad_iret: jmp do_exit .previous - /* edi: workmask, edx: work */ + /* edi: workmask, edx: work */ retint_careful: + CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal sti pushq %rdi + CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi + CFI_ADJUST_CFA_OFFSET -8 GET_THREAD_INFO(%rcx) cli jmp retint_check @@ -523,7 +601,9 @@ retint_kernel: * APIC interrupts. */ .macro apicinterrupt num,func + INTR_FRAME pushq $\num-256 + CFI_ADJUST_CFA_OFFSET 8 interrupt \func jmp ret_from_intr CFI_ENDPROC @@ -536,8 +616,19 @@ ENTRY(thermal_interrupt) ENTRY(reschedule_interrupt) apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt -ENTRY(invalidate_interrupt) - apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt + .macro INVALIDATE_ENTRY num +ENTRY(invalidate_interrupt\num) + apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt + .endm + + INVALIDATE_ENTRY 0 + INVALIDATE_ENTRY 1 + INVALIDATE_ENTRY 2 + INVALIDATE_ENTRY 3 + INVALIDATE_ENTRY 4 + INVALIDATE_ENTRY 5 + INVALIDATE_ENTRY 6 + INVALIDATE_ENTRY 7 ENTRY(call_function_interrupt) apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt @@ -558,16 +649,23 @@ ENTRY(spurious_interrupt) * Exception entry points. */ .macro zeroentry sym + INTR_FRAME pushq $0 /* push error code/oldrax */ + CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* push real oldrax to the rdi slot */ + CFI_ADJUST_CFA_OFFSET 8 leaq \sym(%rip),%rax jmp error_entry + CFI_ENDPROC .endm .macro errorentry sym + XCPT_FRAME pushq %rax + CFI_ADJUST_CFA_OFFSET 8 leaq \sym(%rip),%rax jmp error_entry + CFI_ENDPROC .endm /* error code is on the stack already */ @@ -594,10 +692,7 @@ ENTRY(spurious_interrupt) * and the exception handler in %rax. */ ENTRY(error_entry) - CFI_STARTPROC simple - CFI_DEF_CFA rsp,(SS-RDI) - CFI_REL_OFFSET rsp,(RSP-RDI) - CFI_REL_OFFSET rip,(RIP-RDI) + _frame RDI /* rdi slot contains rax, oldrax contains error code */ cld subq $14*8,%rsp @@ -679,7 +774,9 @@ error_kernelspace: /* Reload gs selector with exception handling */ /* edi: new selector */ ENTRY(load_gs_index) + CFI_STARTPROC pushf + CFI_ADJUST_CFA_OFFSET 8 cli swapgs gs_change: @@ -687,7 +784,9 @@ gs_change: 2: mfence /* workaround */ swapgs popf + CFI_ADJUST_CFA_OFFSET -8 ret + CFI_ENDPROC .section __ex_table,"a" .align 8 @@ -799,7 +898,7 @@ ENTRY(device_not_available) /* runs on exception stack */ KPROBE_ENTRY(debug) - CFI_STARTPROC + INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug @@ -809,9 +908,9 @@ KPROBE_ENTRY(debug) /* runs on exception stack */ ENTRY(nmi) - CFI_STARTPROC + INTR_FRAME pushq $-1 - CFI_ADJUST_CFA_OFFSET 8 + CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_nmi /* * "Paranoid" exit path from exception stack. @@ -877,7 +976,7 @@ ENTRY(reserved) /* runs on exception stack */ ENTRY(double_fault) - CFI_STARTPROC + XCPT_FRAME paranoidentry do_double_fault jmp paranoid_exit CFI_ENDPROC @@ -890,7 +989,7 @@ ENTRY(segment_not_present) /* runs on exception stack */ ENTRY(stack_segment) - CFI_STARTPROC + XCPT_FRAME paranoidentry do_stack_segment jmp paranoid_exit CFI_ENDPROC @@ -911,7 +1010,7 @@ ENTRY(spurious_interrupt_bug) #ifdef CONFIG_X86_MCE /* runs on exception stack */ ENTRY(machine_check) - CFI_STARTPROC + INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check @@ -923,14 +1022,19 @@ ENTRY(call_debug) zeroentry do_call_debug ENTRY(call_softirq) + CFI_STARTPROC movq %gs:pda_irqstackptr,%rax pushq %r15 + CFI_ADJUST_CFA_OFFSET 8 movq %rsp,%r15 + CFI_DEF_CFA_REGISTER r15 incl %gs:pda_irqcount cmove %rax,%rsp call __do_softirq movq %r15,%rsp + CFI_DEF_CFA_REGISTER rsp decl %gs:pda_irqcount popq %r15 + CFI_ADJUST_CFA_OFFSET -8 ret - + CFI_ENDPROC diff --git a/trunk/arch/x86_64/kernel/genapic.c b/trunk/arch/x86_64/kernel/genapic.c index b1c144f73149..7a64ea181788 100644 --- a/trunk/arch/x86_64/kernel/genapic.c +++ b/trunk/arch/x86_64/kernel/genapic.c @@ -45,7 +45,7 @@ void __init clustered_apic_check(void) u8 clusters, max_cluster; u8 id; u8 cluster_cnt[NUM_APIC_CLUSTERS]; - int num_cpus = 0; + int max_apic = 0; #if defined(CONFIG_ACPI) /* @@ -64,14 +64,15 @@ void __init clustered_apic_check(void) id = bios_cpu_apicid[i]; if (id == BAD_APICID) continue; - num_cpus++; + if (id > max_apic) + max_apic = id; cluster_cnt[APIC_CLUSTERID(id)]++; } /* Don't use clustered mode on AMD platforms. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { genapic = &apic_physflat; -#ifndef CONFIG_CPU_HOTPLUG +#ifndef CONFIG_HOTPLUG_CPU /* In the CPU hotplug case we cannot use broadcast mode because that opens a race when a CPU is removed. Stay at physflat mode in this case. @@ -79,7 +80,7 @@ void __init clustered_apic_check(void) we have ACPI platform support for CPU hotplug we should detect hotplug capablity from ACPI tables and only do this when really needed. -AK */ - if (num_cpus <= 8) + if (max_apic <= 8) genapic = &apic_flat; #endif goto print; @@ -103,9 +104,14 @@ void __init clustered_apic_check(void) * (We don't use lowest priority delivery + HW APIC IRQ steering, so * can ignore the clustered logical case and go straight to physical.) */ - if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) + if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { +#ifdef CONFIG_HOTPLUG_CPU + /* Don't use APIC shortcuts in CPU hotplug to avoid races */ + genapic = &apic_physflat; +#else genapic = &apic_flat; - else +#endif + } else genapic = &apic_cluster; print: diff --git a/trunk/arch/x86_64/kernel/genapic_cluster.c b/trunk/arch/x86_64/kernel/genapic_cluster.c index f6523dd1bc09..a472d62f899a 100644 --- a/trunk/arch/x86_64/kernel/genapic_cluster.c +++ b/trunk/arch/x86_64/kernel/genapic_cluster.c @@ -51,10 +51,10 @@ static void cluster_init_apic_ldr(void) count = 3; id = my_cluster | (1UL << count); x86_cpu_to_log_apicid[smp_processor_id()] = id; - apic_write_around(APIC_DFR, APIC_DFR_CLUSTER); + apic_write(APIC_DFR, APIC_DFR_CLUSTER); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ diff --git a/trunk/arch/x86_64/kernel/genapic_flat.c b/trunk/arch/x86_64/kernel/genapic_flat.c index 6d57da96bf8c..9da3edb799ea 100644 --- a/trunk/arch/x86_64/kernel/genapic_flat.c +++ b/trunk/arch/x86_64/kernel/genapic_flat.c @@ -38,10 +38,10 @@ static void flat_init_apic_ldr(void) num = smp_processor_id(); id = 1UL << num; x86_cpu_to_log_apicid[num] = id; - apic_write_around(APIC_DFR, APIC_DFR_FLAT); + apic_write(APIC_DFR, APIC_DFR_FLAT); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } static void flat_send_IPI_mask(cpumask_t cpumask, int vector) @@ -62,7 +62,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) * prepare target chip field */ cfg = __prepare_ICR2(mask); - apic_write_around(APIC_ICR2, cfg); + apic_write(APIC_ICR2, cfg); /* * program the ICR @@ -72,7 +72,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); local_irq_restore(flags); } @@ -177,9 +177,9 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", - .int_delivery_mode = dest_LowestPrio, + .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST, + .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED, .target_cpus = physflat_target_cpus, .apic_id_registered = flat_apic_id_registered, .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ diff --git a/trunk/arch/x86_64/kernel/head.S b/trunk/arch/x86_64/kernel/head.S index 98ff5eb32b9a..4592bf21fcaf 100644 --- a/trunk/arch/x86_64/kernel/head.S +++ b/trunk/arch/x86_64/kernel/head.S @@ -270,7 +270,7 @@ ENTRY(level3_kernel_pgt) .org 0x4000 ENTRY(level2_ident_pgt) /* 40MB for bootup. */ - .quad 0x0000000000000283 + .quad 0x0000000000000183 .quad 0x0000000000200183 .quad 0x0000000000400183 .quad 0x0000000000600183 diff --git a/trunk/arch/x86_64/kernel/i8259.c b/trunk/arch/x86_64/kernel/i8259.c index a89169095129..b2a238b5a17e 100644 --- a/trunk/arch/x86_64/kernel/i8259.c +++ b/trunk/arch/x86_64/kernel/i8259.c @@ -18,15 +18,12 @@ #include #include #include -#include #include #include #include #include #include -#include - /* * Common place to define all x86 IRQ vectors * @@ -486,7 +483,14 @@ void spurious_interrupt(void); void error_interrupt(void); void reschedule_interrupt(void); void call_function_interrupt(void); -void invalidate_interrupt(void); +void invalidate_interrupt0(void); +void invalidate_interrupt1(void); +void invalidate_interrupt2(void); +void invalidate_interrupt3(void); +void invalidate_interrupt4(void); +void invalidate_interrupt5(void); +void invalidate_interrupt6(void); +void invalidate_interrupt7(void); void thermal_interrupt(void); void i8254_timer_resume(void); @@ -562,8 +566,15 @@ void __init init_IRQ(void) */ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + /* IPIs for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/trunk/arch/x86_64/kernel/init_task.c b/trunk/arch/x86_64/kernel/init_task.c index c4dc91491157..e0ba5c1043fd 100644 --- a/trunk/arch/x86_64/kernel/init_task.c +++ b/trunk/arch/x86_64/kernel/init_task.c @@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; #define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/trunk/arch/x86_64/kernel/io_apic.c b/trunk/arch/x86_64/kernel/io_apic.c index 5f1529be1237..522944a000ad 100644 --- a/trunk/arch/x86_64/kernel/io_apic.c +++ b/trunk/arch/x86_64/kernel/io_apic.c @@ -21,7 +21,6 @@ */ #include -#include #include #include #include @@ -45,6 +44,8 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; +int disable_timer_pin_1 __initdata; + static DEFINE_SPINLOCK(ioapic_lock); /* @@ -298,6 +299,15 @@ void __init check_ioapic(void) #endif /* RED-PEN skip them on mptables too? */ return; + case PCI_VENDOR_ID_ATI: + /* All timer interrupts on atiixp + are doubled. Disable one. */ + if (disable_timer_pin_1 == 0) { + disable_timer_pin_1 = 1; + printk(KERN_INFO + "ATI board detected. Disabling timer pin 1.\n"); + } + return; } /* No multi-function device? */ @@ -1022,13 +1032,11 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); - } + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + v = apic_read(APIC_PROCPRI); + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); v = apic_read(APIC_EOI); printk(KERN_DEBUG "... APIC EOI: %08x\n", v); @@ -1048,12 +1056,8 @@ void __apicdebuginit print_local_APIC(void * dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); v = apic_read(APIC_ICR); printk(KERN_DEBUG "... APIC ICR: %08x\n", v); @@ -1665,6 +1669,8 @@ static inline void check_timer(void) setup_nmi(); enable_8259A_irq(0); } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); return; } clear_IO_APIC_pin(0, pin1); diff --git a/trunk/arch/x86_64/kernel/irq.c b/trunk/arch/x86_64/kernel/irq.c index 849a20aec7ca..d6a04a8320a3 100644 --- a/trunk/arch/x86_64/kernel/irq.c +++ b/trunk/arch/x86_64/kernel/irq.c @@ -99,7 +99,6 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) unsigned irq = regs->orig_rax & 0xff; irq_enter(); - BUG_ON(irq > 256); __do_IRQ(irq, regs); irq_exit(); diff --git a/trunk/arch/x86_64/kernel/mce.c b/trunk/arch/x86_64/kernel/mce.c index 8aa56736cde3..969365c0771b 100644 --- a/trunk/arch/x86_64/kernel/mce.c +++ b/trunk/arch/x86_64/kernel/mce.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -56,15 +57,19 @@ void mce_log(struct mce *mce) smp_wmb(); for (;;) { entry = rcu_dereference(mcelog.next); - /* When the buffer fills up discard new entries. Assume - that the earlier errors are the more interesting. */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, &mcelog.flags); - return; + for (;;) { + /* When the buffer fills up discard new entries. Assume + that the earlier errors are the more interesting. */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, &mcelog.flags); + return; + } + /* Old left over entry. Skip. */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } } - /* Old left over entry. Skip. */ - if (mcelog.entry[entry].finished) - continue; smp_rmb(); next = entry + 1; if (cmpxchg(&mcelog.next, entry, next) == entry) @@ -404,9 +409,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff } err = 0; - for (i = 0; i < next; i++) { - if (!mcelog.entry[i].finished) - continue; + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + while (!mcelog.entry[i].finished) { + if (!time_before(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + continue; + } + cpu_relax(); + } smp_rmb(); err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); buf += sizeof(struct mce); @@ -479,6 +490,7 @@ static int __init mcheck_disable(char *str) /* mce=off disables machine check. Note you can reenable it later using sysfs. + mce=TOLERANCELEVEL (number, see above) mce=bootlog Log MCEs from before booting. Disabled by default to work around buggy BIOS that leave bogus MCEs. */ static int __init mcheck_enable(char *str) @@ -489,6 +501,8 @@ static int __init mcheck_enable(char *str) mce_dont_init = 1; else if (!strcmp(str, "bootlog")) mce_bootlog = 1; + else if (isdigit(str[0])) + get_option(&str, &tolerant); else printk("mce= argument %s ignored. Please use /sys", str); return 0; @@ -501,10 +515,12 @@ __setup("mce", mcheck_enable); * Sysfs support */ -/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. */ +/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. + Only one CPU is active at this time, the others get readded later using + CPU hotplug. */ static int mce_resume(struct sys_device *dev) { - on_each_cpu(mce_init, NULL, 1, 1); + mce_init(NULL); return 0; } diff --git a/trunk/arch/x86_64/kernel/mpparse.c b/trunk/arch/x86_64/kernel/mpparse.c index 8d8ed6ae1d0c..f16d38d09daf 100644 --- a/trunk/arch/x86_64/kernel/mpparse.c +++ b/trunk/arch/x86_64/kernel/mpparse.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include @@ -46,8 +45,6 @@ int acpi_found_madt; int apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -unsigned char pci_bus_to_node [256]; -EXPORT_SYMBOL(pci_bus_to_node); static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -705,7 +702,7 @@ void __init mp_register_lapic ( processor.mpc_type = MP_PROCESSOR; processor.mpc_apicid = id; - processor.mpc_apicver = 0x10; /* TBD: lapic version */ + processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | diff --git a/trunk/arch/x86_64/kernel/msr.c b/trunk/arch/x86_64/kernel/msr.c deleted file mode 100644 index 598953ab0154..000000000000 --- a/trunk/arch/x86_64/kernel/msr.c +++ /dev/null @@ -1,279 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * msr.c - * - * x86 MSR access device - * - * This device is accessed by lseek() to the appropriate register number - * and then read/write in chunks of 8 bytes. A larger size means multiple - * reads or writes of the same register. - * - * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Note: "err" is handled in a funny way below. Otherwise one version - of gcc or another breaks. */ - -static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) -{ - int err; - - asm volatile ("1: wrmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" " .quad 1b,3b\n" ".previous":"=&bDS" (err) - :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0)); - - return err; -} - -static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) -{ - int err; - - asm volatile ("1: rdmsr\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %4,%0\n" - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 1b,3b\n" - ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx) - :"c"(reg), "i"(-EIO), "0"(0)); - - return err; -} - -#ifdef CONFIG_SMP - -struct msr_command { - int cpu; - int err; - u32 reg; - u32 data[2]; -}; - -static void msr_smp_wrmsr(void *cmd_block) -{ - struct msr_command *cmd = (struct msr_command *)cmd_block; - - if (cmd->cpu == smp_processor_id()) - cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); -} - -static void msr_smp_rdmsr(void *cmd_block) -{ - struct msr_command *cmd = (struct msr_command *)cmd_block; - - if (cmd->cpu == smp_processor_id()) - cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); -} - -static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -{ - struct msr_command cmd; - int ret; - - preempt_disable(); - if (cpu == smp_processor_id()) { - ret = wrmsr_eio(reg, eax, edx); - } else { - cmd.cpu = cpu; - cmd.reg = reg; - cmd.data[0] = eax; - cmd.data[1] = edx; - - smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); - ret = cmd.err; - } - preempt_enable(); - return ret; -} - -static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) -{ - struct msr_command cmd; - int ret; - - preempt_disable(); - if (cpu == smp_processor_id()) { - ret = rdmsr_eio(reg, eax, edx); - } else { - cmd.cpu = cpu; - cmd.reg = reg; - - smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); - - *eax = cmd.data[0]; - *edx = cmd.data[1]; - - ret = cmd.err; - } - preempt_enable(); - return ret; -} - -#else /* ! CONFIG_SMP */ - -static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -{ - return wrmsr_eio(reg, eax, edx); -} - -static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) -{ - return rdmsr_eio(reg, eax, edx); -} - -#endif /* ! CONFIG_SMP */ - -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret = -EINVAL; - - lock_kernel(); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - } - unlock_kernel(); - return ret; -} - -static ssize_t msr_read(struct file *file, char __user * buf, - size_t count, loff_t * ppos) -{ - u32 __user *tmp = (u32 __user *) buf; - u32 data[2]; - size_t rv; - u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (rv = 0; count; count -= 8) { - err = do_rdmsr(cpu, reg, &data[0], &data[1]); - if (err) - return err; - if (copy_to_user(tmp, &data, 8)) - return -EFAULT; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static ssize_t msr_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - const u32 __user *tmp = (const u32 __user *)buf; - u32 data[2]; - size_t rv; - u32 reg = *ppos; - int cpu = iminor(file->f_dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (rv = 0; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) - return -EFAULT; - err = do_wrmsr(cpu, reg, data[0], data[1]); - if (err) - return err; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static int msr_open(struct inode *inode, struct file *file) -{ - unsigned int cpu = iminor(file->f_dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (!cpu_has(c, X86_FEATURE_MSR)) - return -EIO; /* MSR not supported */ - - return 0; -} - -/* - * File operations we support - */ -static struct file_operations msr_fops = { - .owner = THIS_MODULE, - .llseek = msr_seek, - .read = msr_read, - .write = msr_write, - .open = msr_open, -}; - -static int __init msr_init(void) -{ - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { - printk(KERN_ERR "msr: unable to get major %d for msr\n", - MSR_MAJOR); - return -EBUSY; - } - - return 0; -} - -static void __exit msr_exit(void) -{ - unregister_chrdev(MSR_MAJOR, "cpu/msr"); -} - -module_init(msr_init); -module_exit(msr_exit) - -MODULE_AUTHOR("H. Peter Anvin "); -MODULE_DESCRIPTION("x86 generic MSR driver"); -MODULE_LICENSE("GPL"); diff --git a/trunk/arch/x86_64/kernel/nmi.c b/trunk/arch/x86_64/kernel/nmi.c index caf164959e19..4388b8a5bae7 100644 --- a/trunk/arch/x86_64/kernel/nmi.c +++ b/trunk/arch/x86_64/kernel/nmi.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -488,8 +487,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); return; - } - die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); + } + die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs); } } else { __get_cpu_var(last_irq_sum) = sum; diff --git a/trunk/arch/x86_64/kernel/pci-gart.c b/trunk/arch/x86_64/kernel/pci-gart.c index 57f35c68aa34..cf0a0315d586 100644 --- a/trunk/arch/x86_64/kernel/pci-gart.c +++ b/trunk/arch/x86_64/kernel/pci-gart.c @@ -191,11 +191,9 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) { struct page *page; int node; - if (dev->bus == &pci_bus_type) { - cpumask_t mask; - mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - node = cpu_to_node(first_cpu(mask)); - } else + if (dev->bus == &pci_bus_type) + node = pcibus_to_node(to_pci_dev(dev)->bus); + else node = numa_node_id(); page = alloc_pages_node(node, gfp, order); return page ? page_address(page) : NULL; diff --git a/trunk/arch/x86_64/kernel/process.c b/trunk/arch/x86_64/kernel/process.c index 8661f82ac70b..b5a89c0bdf59 100644 --- a/trunk/arch/x86_64/kernel/process.c +++ b/trunk/arch/x86_64/kernel/process.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -123,6 +122,7 @@ static void poll_idle (void) : : "i" (_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + clear_thread_flag(TIF_POLLING_NRFLAG); } else { set_need_resched(); } @@ -271,8 +271,11 @@ void __show_regs(struct pt_regs * regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s\n", - current->pid, current->comm, print_tainted(), system_utsname.release); + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + system_utsname.release, + (int)strcspn(system_utsname.version, " "), + system_utsname.version); printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); printk_address(regs->rip); printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); @@ -482,33 +485,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, return err; } -/* - * This function selects if the context switch from prev to next - * has to tweak the TSC disable bit in the cr4. - */ -static inline void disable_tsc(struct task_struct *prev_p, - struct task_struct *next_p) -{ - struct thread_info *prev, *next; - - /* - * gcc should eliminate the ->thread_info dereference if - * has_secure_computing returns 0 at compile time (SECCOMP=n). - */ - prev = prev_p->thread_info; - next = next_p->thread_info; - - if (has_secure_computing(prev) || has_secure_computing(next)) { - /* slow path here */ - if (has_secure_computing(prev) && - !has_secure_computing(next)) { - write_cr4(read_cr4() & ~X86_CR4_TSD); - } else if (!has_secure_computing(prev) && - has_secure_computing(next)) - write_cr4(read_cr4() | X86_CR4_TSD); - } -} - /* * This special macro can be used to load a debugging register */ @@ -627,8 +603,6 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * } } - disable_tsc(prev_p, next_p); - return prev_p; } diff --git a/trunk/arch/x86_64/kernel/setup.c b/trunk/arch/x86_64/kernel/setup.c index 9aec524be3eb..351d8d64c2fb 100644 --- a/trunk/arch/x86_64/kernel/setup.c +++ b/trunk/arch/x86_64/kernel/setup.c @@ -336,6 +336,11 @@ static __init void parse_cmdline_early (char ** cmdline_p) #endif #endif + if (!memcmp(from, "disable_timer_pin_1", 19)) + disable_timer_pin_1 = 1; + if (!memcmp(from, "enable_timer_pin_1", 18)) + disable_timer_pin_1 = -1; + if (!memcmp(from, "nolapic", 7) || !memcmp(from, "disableapic", 11)) disable_apic = 1; @@ -755,6 +760,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_NUMA +static int nearby_node(int apicid) +{ + int i; + for (i = apicid - 1; i >= 0; i--) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. * Assumes number of cores is a power of two. @@ -763,8 +786,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - int node = 0; unsigned bits; +#ifdef CONFIG_NUMA + int node = 0; + unsigned apicid = phys_proc_id[cpu]; +#endif bits = 0; while ((1 << bits) < c->x86_num_cores) @@ -776,20 +802,32 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) phys_proc_id[cpu] >>= bits; #ifdef CONFIG_NUMA - /* When an ACPI SRAT table is available use the mappings from SRAT - instead. */ - if (acpi_numa <= 0) { - node = phys_proc_id[cpu]; - if (!node_online(node)) - node = first_node(node_online_map); - cpu_to_node[cpu] = node; - } else { - node = cpu_to_node[cpu]; - } + node = phys_proc_id[cpu]; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + int ht_nodeid = apicid - (phys_proc_id[0] << bits); + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + cpu_to_node[cpu] = node; + + printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", + cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif - - printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif } @@ -909,6 +947,25 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +static void srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned apicid, node; + int cpu = smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + apicid = phys_proc_id[cpu]; + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE) + node = 0; + cpu_to_node[cpu] = node; + + if (acpi_numa > 0) + printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); +#endif +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { /* Cache sizes */ @@ -927,6 +984,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 15) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); c->x86_num_cores = intel_num_cpu_cores(c); + + srat_detect_node(); } static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) diff --git a/trunk/arch/x86_64/kernel/setup64.c b/trunk/arch/x86_64/kernel/setup64.c index e3ffcacc8c90..bd33be24a386 100644 --- a/trunk/arch/x86_64/kernel/setup64.c +++ b/trunk/arch/x86_64/kernel/setup64.c @@ -94,7 +94,7 @@ void __init setup_per_cpu_areas(void) size = PERCPU_ENOUGH_ROOM; #endif - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu_mask (i, cpu_possible_map) { char *ptr; if (!NODE_DATA(cpu_to_node(i))) { @@ -119,7 +119,6 @@ void pda_init(int cpu) asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); wrmsrl(MSR_GS_BASE, cpu_pda + cpu); - pda->me = pda; pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = diff --git a/trunk/arch/x86_64/kernel/smp.c b/trunk/arch/x86_64/kernel/smp.c index e5958220d6b8..9db9dda161b4 100644 --- a/trunk/arch/x86_64/kernel/smp.c +++ b/trunk/arch/x86_64/kernel/smp.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include @@ -29,6 +28,8 @@ #include #include +#define __cpuinit __init + /* * Smarter SMP flushing macros. * c/o Linus Torvalds. @@ -37,19 +38,41 @@ * writing to user space from interrupts. (Its not allowed anyway). * * Optimizations Manfred Spraul + * + * More scalable flush, from Andi Kleen + * + * To avoid global state use 8 different call vectors. + * Each CPU uses a specific vector to trigger flushes on other + * CPUs. Depending on the received vector the target CPUs look into + * the right per cpu variable for the flush data. + * + * With more than 8 CPUs they are hashed to the 8 available + * vectors. The limited global vector space forces us to this right now. + * In future when interrupts are split into per CPU domains this could be + * fixed, at the cost of triggering multiple IPIs in some cases. */ -static cpumask_t flush_cpumask; -static struct mm_struct * flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +union smp_flush_state { + struct { + cpumask_t flush_cpumask; + struct mm_struct *flush_mm; + unsigned long flush_va; #define FLUSH_ALL -1ULL + spinlock_t tlbstate_lock; + }; + char pad[SMP_CACHE_BYTES]; +} ____cacheline_aligned; + +/* State is put into the per CPU data section, but padded + to a full cache line because other CPUs can access it and we don't + want false sharing in the per cpu data segment. */ +static DEFINE_PER_CPU(union smp_flush_state, flush_state); /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. */ -static inline void leave_mm (unsigned long cpu) +static inline void leave_mm(int cpu) { if (read_pda(mmu_state) == TLBSTATE_OK) BUG(); @@ -101,15 +124,25 @@ static inline void leave_mm (unsigned long cpu) * * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. */ -asmlinkage void smp_invalidate_interrupt (void) +asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + int cpu; + int sender; + union smp_flush_state *f; - cpu = get_cpu(); + cpu = smp_processor_id(); + /* + * orig_rax contains the interrupt vector - 256. + * Use that to determine where the sender put the data. + */ + sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; + f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, flush_cpumask)) + if (!cpu_isset(cpu, f->flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the @@ -120,64 +153,63 @@ asmlinkage void smp_invalidate_interrupt (void) * BUG(); */ - if (flush_mm == read_pda(active_mm)) { + if (f->flush_mm == read_pda(active_mm)) { if (read_pda(mmu_state) == TLBSTATE_OK) { - if (flush_va == FLUSH_ALL) + if (f->flush_va == FLUSH_ALL) local_flush_tlb(); else - __flush_tlb_one(flush_va); + __flush_tlb_one(f->flush_va); } else leave_mm(cpu); } out: ack_APIC_irq(); - cpu_clear(cpu, flush_cpumask); - put_cpu_no_resched(); + cpu_clear(cpu, f->flush_cpumask); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, unsigned long va) { - cpumask_t tmp; - /* - * A couple of (to be removed) sanity checks: - * - * - we do not send IPIs to not-yet booted CPUs. - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(tmp, cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - if (!mm) - BUG(); + int sender; + union smp_flush_state *f; - /* - * I'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * Temporarily this turns IRQs off, so that lockups are - * detected by the NMI watchdog. - */ - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); + /* Caller has disabled preemption */ + sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; + f = &per_cpu(flush_state, sender); + + /* Could avoid this lock when + num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is + probably not worth checking this for a cache-hot lock. */ + spin_lock(&f->tlbstate_lock); + + f->flush_mm = mm; + f->flush_va = va; + cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); /* * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + + while (!cpus_empty(f->flush_cpumask)) + cpu_relax(); - while (!cpus_empty(flush_cpumask)) - mb(); /* nothing. lockup detection does not belong here */; + f->flush_mm = NULL; + f->flush_va = 0; + spin_unlock(&f->tlbstate_lock); +} - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); +int __cpuinit init_smp_flush(void) +{ + int i; + for_each_cpu_mask(i, cpu_possible_map) { + spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i)); + } + return 0; } + +core_initcall(init_smp_flush); void flush_tlb_current_task(void) { @@ -295,8 +327,11 @@ void unlock_ipi_call_lock(void) /* * this function sends a 'generic call function' IPI to one other CPU * in the system. + * + * cpu is a standard Linux logical CPU number. */ -static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info, +static void +__smp_call_function_single(int cpu, void (*func) (void *info), void *info, int nonatomic, int wait) { struct call_data_struct data; diff --git a/trunk/arch/x86_64/kernel/smpboot.c b/trunk/arch/x86_64/kernel/smpboot.c index 4fb34b5cb1f9..e12d7baeb33e 100644 --- a/trunk/arch/x86_64/kernel/smpboot.c +++ b/trunk/arch/x86_64/kernel/smpboot.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -58,6 +57,8 @@ #include #include #include +#include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -413,8 +414,13 @@ void __cpuinit smp_callin(void) /* * Get our bogomips. + * + * Need to enable IRQs because it can take longer and then + * the NMI watchdog might kill us. */ + local_irq_enable(); calibrate_delay(); + local_irq_disable(); Dprintk("Stack at about %p\n",&cpuid); disable_APIC_timer(); @@ -540,8 +546,8 @@ static void inquire_remote_apic(int apicid) */ apic_wait_icr_idle(); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); timeout = 0; do { @@ -574,12 +580,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); @@ -595,10 +601,10 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); timeout = 0; @@ -610,16 +616,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta atomic_set(&init_deasserted, 1); - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; + num_starts = 2; /* * Run STARTUP IPI loop. @@ -640,12 +637,11 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_rip >> 12)); + apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12)); /* * Give the other CPU some time to accept the IPI. @@ -864,17 +860,6 @@ static __cpuinit void smp_cleanup_boot(void) * Reset trampoline flag */ *((volatile int *) phys_to_virt(0x467)) = 0; - -#ifndef CONFIG_HOTPLUG_CPU - /* - * Free pages reserved for SMP bootup. - * When you add hotplug CPU support later remove this - * Note there is more work to be done for later CPU bootup. - */ - - free_page((unsigned long) __va(PAGE_SIZE)); - free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE)); -#endif } /* diff --git a/trunk/arch/x86_64/kernel/suspend.c b/trunk/arch/x86_64/kernel/suspend.c index 0612640d91b1..ebb9abf3ce6d 100644 --- a/trunk/arch/x86_64/kernel/suspend.c +++ b/trunk/arch/x86_64/kernel/suspend.c @@ -8,23 +8,8 @@ */ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include #include struct saved_context saved_context; diff --git a/trunk/arch/x86_64/kernel/time.c b/trunk/arch/x86_64/kernel/time.c index 7b6abe058253..2373cb8b8625 100644 --- a/trunk/arch/x86_64/kernel/time.c +++ b/trunk/arch/x86_64/kernel/time.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -937,7 +936,6 @@ void __init time_init(void) vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; - vxtime.hz = vxtime_hz; rdtscll_sync(&vxtime.last_tsc); setup_irq(0, &irq0); diff --git a/trunk/arch/x86_64/kernel/traps.c b/trunk/arch/x86_64/kernel/traps.c index f238d6078a5a..b5e09e6b5536 100644 --- a/trunk/arch/x86_64/kernel/traps.c +++ b/trunk/arch/x86_64/kernel/traps.c @@ -47,9 +47,6 @@ #include #include -#include - - extern struct gate_struct idt_table[256]; asmlinkage void divide_error(void); @@ -324,13 +321,13 @@ void handle_BUG(struct pt_regs *regs) if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame))) return; - if ((unsigned long)f.filename < __PAGE_OFFSET || + if (f.filename >= 0 || f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) return; - if (__get_user(tmp, f.filename)) - f.filename = "unmapped filename"; + if (__get_user(tmp, (char *)(long)f.filename)) + f.filename = (int)(long)"unmapped filename"; printk("----------- [cut here ] --------- [please bite here ] ---------\n"); - printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line); + printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line); } #ifdef CONFIG_BUG @@ -343,30 +340,33 @@ void out_of_line_bug(void) static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; -void oops_begin(void) +unsigned long oops_begin(void) { - int cpu = safe_smp_processor_id(); - /* racy, but better than risking deadlock. */ - local_irq_disable(); + int cpu = safe_smp_processor_id(); + unsigned long flags; + + /* racy, but better than risking deadlock. */ + local_irq_save(flags); if (!spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + spin_lock(&die_lock); } - die_owner = cpu; + die_owner = cpu; console_verbose(); - bust_spinlocks(1); + bust_spinlocks(1); + return flags; } -void oops_end(void) +void oops_end(unsigned long flags) { die_owner = -1; - bust_spinlocks(0); - spin_unlock(&die_lock); + bust_spinlocks(0); + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) - panic("Oops"); -} + panic("Oops"); +} void __die(const char * str, struct pt_regs * regs, long err) { @@ -392,10 +392,11 @@ void __die(const char * str, struct pt_regs * regs, long err) void die(const char * str, struct pt_regs * regs, long err) { - oops_begin(); + unsigned long flags = oops_begin(); + handle_BUG(regs); __die(str, regs, err); - oops_end(); + oops_end(flags); do_exit(SIGSEGV); } static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) @@ -406,7 +407,8 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e void die_nmi(char *str, struct pt_regs *regs) { - oops_begin(); + unsigned long flags = oops_begin(); + /* * We are in trouble anyway, lets at least try * to get a message out. @@ -416,7 +418,7 @@ void die_nmi(char *str, struct pt_regs *regs) if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); - oops_end(); + oops_end(flags); do_exit(SIGSEGV); } @@ -790,13 +792,16 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) */ cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + switch (swd & ~cwd & 0x3f) { case 0x000: default: break; case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ info.si_code = FPE_FLTINV; break; case 0x002: /* Denormalize */ diff --git a/trunk/arch/x86_64/kernel/vsyscall.c b/trunk/arch/x86_64/kernel/vsyscall.c index 2e5734425949..70a0bd16085f 100644 --- a/trunk/arch/x86_64/kernel/vsyscall.c +++ b/trunk/arch/x86_64/kernel/vsyscall.c @@ -107,7 +107,7 @@ static force_inline long time_syscall(long *t) return secs; } -static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) +int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) { if (unlikely(!__sysctl_vsyscall)) return gettimeofday(tv,tz); @@ -120,7 +120,7 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz /* This will break when the xtime seconds get inaccurate, but that is * unlikely */ -static time_t __vsyscall(1) vtime(time_t *t) +time_t __vsyscall(1) vtime(time_t *t) { if (unlikely(!__sysctl_vsyscall)) return time_syscall(t); @@ -129,12 +129,12 @@ static time_t __vsyscall(1) vtime(time_t *t) return __xtime.tv_sec; } -static long __vsyscall(2) venosys_0(void) +long __vsyscall(2) venosys_0(void) { return -ENOSYS; } -static long __vsyscall(3) venosys_1(void) +long __vsyscall(3) venosys_1(void) { return -ENOSYS; } diff --git a/trunk/arch/x86_64/mm/fault.c b/trunk/arch/x86_64/mm/fault.c index 816732d8858c..b75b872ec154 100644 --- a/trunk/arch/x86_64/mm/fault.c +++ b/trunk/arch/x86_64/mm/fault.c @@ -221,12 +221,13 @@ int unhandled_signal(struct task_struct *tsk, int sig) static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { - oops_begin(); + unsigned long flags = oops_begin(); + printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", current->comm, address); dump_pagetable(address); __die("Bad pagetable", regs, error_code); - oops_end(); + oops_end(flags); do_exit(SIGKILL); } @@ -304,6 +305,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long address; const struct exception_table_entry *fixup; int write; + unsigned long flags; siginfo_t info; #ifdef CONFIG_CHECKING @@ -521,7 +523,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, * terminate things with extreme prejudice. */ - oops_begin(); + flags = oops_begin(); if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); @@ -534,7 +536,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, __die("Oops", regs, error_code); /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(); + oops_end(flags); do_exit(SIGKILL); /* diff --git a/trunk/arch/x86_64/mm/init.c b/trunk/arch/x86_64/mm/init.c index aa4a5189ecee..e60a1a848de8 100644 --- a/trunk/arch/x86_64/mm/init.c +++ b/trunk/arch/x86_64/mm/init.c @@ -57,31 +57,31 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); void show_mem(void) { - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; + long i, total = 0, reserved = 0; + long shared = 0, cached = 0; pg_data_t *pgdat; struct page *page; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pfn_to_page(pgdat->node_start_pfn + i); total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; } } - printk("%d pages of RAM\n", total); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); + printk(KERN_INFO "%lu pages of RAM\n", total); + printk(KERN_INFO "%lu reserved pages\n",reserved); + printk(KERN_INFO "%lu pages shared\n",shared); + printk(KERN_INFO "%lu pages swap cached\n",cached); } /* References to section boundaries */ @@ -381,41 +381,14 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) __flush_tlb_all(); } -static inline int page_is_ram (unsigned long pagenr) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long addr, end; - - if (e820.map[i].type != E820_RAM) /* not usable memory */ - continue; - /* - * !!!FIXME!!! Some BIOSen report areas as RAM that - * are not. Notably the 640->1Mb area. We need a sanity - * check here. - */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - -extern int swiotlb_force; - static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int tmp; + long codesize, reservedpages, datasize, initsize; #ifdef CONFIG_SWIOTLB - if (swiotlb_force) - swiotlb = 1; if (!iommu_aperture && (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu)) swiotlb = 1; @@ -436,25 +409,11 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ #ifdef CONFIG_NUMA - totalram_pages += numa_free_all_bootmem(); - tmp = 0; - /* should count reserved pages here for all nodes */ + totalram_pages = numa_free_all_bootmem(); #else - -#ifdef CONFIG_FLATMEM - max_mapnr = end_pfn; - if (!mem_map) BUG(); -#endif - - totalram_pages += free_all_bootmem(); - - for (tmp = 0; tmp < end_pfn; tmp++) - /* - * Only count reserved RAM pages - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; + totalram_pages = free_all_bootmem(); #endif + reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); after_bootmem = 1; @@ -471,7 +430,7 @@ void __init mem_init(void) kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START); - printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", + printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), codesize >> 10, diff --git a/trunk/arch/x86_64/mm/k8topology.c b/trunk/arch/x86_64/mm/k8topology.c index ec35747aacd7..65417b040c1b 100644 --- a/trunk/arch/x86_64/mm/k8topology.c +++ b/trunk/arch/x86_64/mm/k8topology.c @@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned long prevbase; struct node nodes[8]; int nodeid, i, nb; + unsigned char nodeids[8]; int found = 0; u32 reg; unsigned numnodes; nodemask_t nodes_parsed; + unsigned dualcore = 0; nodes_clear(nodes_parsed); @@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base,limit; - + u32 nodeid; + + /* Undefined before E stepping, but hopefully 0 */ + dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); nodeid = limit & 7; + nodeids[i] = nodeid; if ((base & 3) == 0) { if (i < numnodes) printk("Skipping disabled node %d\n", i); @@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { - /* assume 1:1 NODE:CPU */ - cpu_to_node[i] = i; + nodeid = nodeids[i]; + apicid_to_node[nodeid << dualcore] = i; + apicid_to_node[(nodeid << dualcore) + dualcore] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } } diff --git a/trunk/arch/x86_64/mm/numa.c b/trunk/arch/x86_64/mm/numa.c index 04f7a33e144c..80a49d9bd8a7 100644 --- a/trunk/arch/x86_64/mm/numa.c +++ b/trunk/arch/x86_64/mm/numa.c @@ -28,8 +28,13 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = NUMA_NO_NODE +}; +unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { + [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE +}; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; diff --git a/trunk/arch/x86_64/mm/srat.c b/trunk/arch/x86_64/mm/srat.c index 8e3d097a9ddd..4b2e844c15a7 100644 --- a/trunk/arch/x86_64/mm/srat.c +++ b/trunk/arch/x86_64/mm/srat.c @@ -20,14 +20,20 @@ static struct acpi_table_slit *acpi_slit; -/* Internal processor count */ -static unsigned int __initdata num_processors = 0; - static nodemask_t nodes_parsed __initdata; static nodemask_t nodes_found __initdata; static struct node nodes[MAX_NUMNODES] __initdata; static __u8 pxm2node[256] = { [0 ... 255] = 0xff }; +static int node_to_pxm(int n); + +int pxm_to_node(int pxm) +{ + if ((unsigned)pxm >= 256) + return 0; + return pxm2node[pxm]; +} + static __init int setup_node(int pxm) { unsigned node = pxm2node[pxm]; @@ -44,14 +50,14 @@ static __init int setup_node(int pxm) static __init int conflicting_nodes(unsigned long start, unsigned long end) { int i; - for_each_online_node(i) { + for_each_node_mask(i, nodes_parsed) { struct node *nd = &nodes[i]; if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) - return 1; + return i; if (nd->end == end && nd->start == start) - return 1; + return i; } return -1; } @@ -75,8 +81,11 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) static __init void bad_srat(void) { + int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; + for (i = 0; i < MAX_LOCAL_APIC; i++) + apicid_to_node[i] = NUMA_NO_NODE; } static __init inline int srat_disabled(void) @@ -104,18 +113,10 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) bad_srat(); return; } - if (num_processors >= NR_CPUS) { - printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n", - num_processors, pa->apic_id, NR_CPUS); - bad_srat(); - return; - } - cpu_to_node[num_processors] = node; + apicid_to_node[pa->apic_id] = node; acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n", - pxm, pa->apic_id, num_processors, node); - - num_processors++; + printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + pxm, pa->apic_id, node); } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ @@ -143,10 +144,15 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n", start, end); i = conflicting_nodes(start, end); - if (i >= 0) { + if (i == node) { + printk(KERN_WARNING + "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", + pxm, start, end, nodes[i].start, nodes[i].end); + } else if (i >= 0) { printk(KERN_ERR - "SRAT: pxm %d overlap %lx-%lx with node %d(%Lx-%Lx)\n", - pxm, start, end, i, nodes[i].start, nodes[i].end); + "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n", + pxm, start, end, node_to_pxm(i), + nodes[i].start, nodes[i].end); bad_srat(); return; } @@ -174,6 +180,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) int i; if (acpi_numa <= 0) return -1; + + /* First clean up the node list */ + for_each_node_mask(i, nodes_parsed) { + cutoff_node(i, start, end); + if (nodes[i].start == nodes[i].end) + node_clear(i, nodes_parsed); + } + memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed)); if (memnode_shift < 0) { printk(KERN_ERR @@ -181,16 +195,10 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) bad_srat(); return -1; } - for (i = 0; i < MAX_NUMNODES; i++) { - if (!node_isset(i, nodes_parsed)) - continue; - cutoff_node(i, start, end); - if (nodes[i].start == nodes[i].end) { - node_clear(i, nodes_parsed); - continue; - } + + /* Finally register nodes */ + for_each_node_mask(i, nodes_parsed) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - } for (i = 0; i < NR_CPUS; i++) { if (cpu_to_node[i] == NUMA_NO_NODE) continue; @@ -201,7 +209,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return 0; } -int node_to_pxm(int n) +static int node_to_pxm(int n) { int i; if (pxm2node[n] == n) diff --git a/trunk/arch/x86_64/pci/k8-bus.c b/trunk/arch/x86_64/pci/k8-bus.c index d80c323669e0..3acf60ded2a0 100644 --- a/trunk/arch/x86_64/pci/k8-bus.c +++ b/trunk/arch/x86_64/pci/k8-bus.c @@ -58,10 +58,16 @@ fill_mp_bus_to_cpumask(void) for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) { - int node = NODE_ID(nid); + struct pci_bus *bus; + long node = NODE_ID(nid); + /* Algorithm a bit dumb, but + it shouldn't matter here */ + bus = pci_find_bus(0, j); + if (!bus) + continue; if (!node_online(node)) node = 0; - pci_bus_to_node[j] = node; + bus->sysdata = (void *)node; } } } diff --git a/trunk/arch/x86_64/pci/mmconfig.c b/trunk/arch/x86_64/pci/mmconfig.c index 657e88aa0902..a0838c4a94e4 100644 --- a/trunk/arch/x86_64/pci/mmconfig.c +++ b/trunk/arch/x86_64/pci/mmconfig.c @@ -111,13 +111,6 @@ static int __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return 0; - /* Kludge for now. Don't use mmconfig on AMD systems because - those have some busses where mmconfig doesn't work, - and we don't parse ACPI MCFG well enough to handle that. - Remove when proper handling is added. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return 0; - /* RED-PEN i386 doesn't do _nocache right now */ pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { diff --git a/trunk/include/asm-generic/tlb.h b/trunk/include/asm-generic/tlb.h index faff403e1061..7d0298347ee7 100644 --- a/trunk/include/asm-generic/tlb.h +++ b/trunk/include/asm-generic/tlb.h @@ -23,7 +23,11 @@ * and page free order so much.. */ #ifdef CONFIG_SMP - #define FREE_PTE_NR 506 + #ifdef ARCH_FREE_PTR_NR + #define FREE_PTR_NR ARCH_FREE_PTR_NR + #else + #define FREE_PTE_NR 506 + #endif #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U) #else #define FREE_PTE_NR 1 diff --git a/trunk/include/asm-i386/apic.h b/trunk/include/asm-i386/apic.h index 6a1b1882285c..8c454aa58ac6 100644 --- a/trunk/include/asm-i386/apic.h +++ b/trunk/include/asm-i386/apic.h @@ -130,6 +130,8 @@ extern unsigned int nmi_watchdog; #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +extern int disable_timer_pin_1; + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff --git a/trunk/include/asm-i386/numa.h b/trunk/include/asm-i386/numa.h new file mode 100644 index 000000000000..96fcb157db1d --- /dev/null +++ b/trunk/include/asm-i386/numa.h @@ -0,0 +1,3 @@ + +int pxm_to_nid(int pxm); + diff --git a/trunk/include/asm-i386/topology.h b/trunk/include/asm-i386/topology.h index 2461b731781e..0ec27c9e8e45 100644 --- a/trunk/include/asm-i386/topology.h +++ b/trunk/include/asm-i386/topology.h @@ -60,7 +60,7 @@ static inline int node_to_first_cpu(int node) return first_cpu(mask); } -#define pcibus_to_node(bus) mp_bus_id_to_node[(bus)->number] +#define pcibus_to_node(bus) ((long) (bus)->sysdata) #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)) /* sched_domains SD_NODE_INIT for NUMAQ machines */ diff --git a/trunk/include/asm-i386/unistd.h b/trunk/include/asm-i386/unistd.h index a7cb377745bf..fbaf90a3968c 100644 --- a/trunk/include/asm-i386/unistd.h +++ b/trunk/include/asm-i386/unistd.h @@ -332,7 +332,7 @@ type name(type1 arg1) \ long __res; \ __asm__ volatile ("int $0x80" \ : "=a" (__res) \ - : "0" (__NR_##name),"b" ((long)(arg1))); \ + : "0" (__NR_##name),"b" ((long)(arg1)) : "memory"); \ __syscall_return(type,__res); \ } @@ -342,7 +342,7 @@ type name(type1 arg1,type2 arg2) \ long __res; \ __asm__ volatile ("int $0x80" \ : "=a" (__res) \ - : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2))); \ + : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)) : "memory"); \ __syscall_return(type,__res); \ } @@ -353,7 +353,7 @@ long __res; \ __asm__ volatile ("int $0x80" \ : "=a" (__res) \ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3))); \ + "d" ((long)(arg3)) : "memory"); \ __syscall_return(type,__res); \ } @@ -364,7 +364,7 @@ long __res; \ __asm__ volatile ("int $0x80" \ : "=a" (__res) \ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4))); \ + "d" ((long)(arg3)),"S" ((long)(arg4)) : "memory"); \ __syscall_return(type,__res); \ } @@ -376,7 +376,7 @@ long __res; \ __asm__ volatile ("int $0x80" \ : "=a" (__res) \ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5))); \ + "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)) : "memory"); \ __syscall_return(type,__res); \ } @@ -389,7 +389,7 @@ __asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; int $0x80 ; p : "=a" (__res) \ : "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \ - "0" ((long)(arg6))); \ + "0" ((long)(arg6)) : "memory"); \ __syscall_return(type,__res); \ } diff --git a/trunk/include/asm-x86_64/apic.h b/trunk/include/asm-x86_64/apic.h index 16ec82e16b21..6c5d5ca8383a 100644 --- a/trunk/include/asm-x86_64/apic.h +++ b/trunk/include/asm-x86_64/apic.h @@ -109,9 +109,10 @@ extern unsigned int nmi_watchdog; #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +extern int disable_timer_pin_1; + #endif /* CONFIG_X86_LOCAL_APIC */ -#define esr_disable 0 extern unsigned boot_cpu_id; #endif /* __ASM_APIC_H */ diff --git a/trunk/include/asm-x86_64/apicdef.h b/trunk/include/asm-x86_64/apicdef.h index 9388062c4f6e..fb1c99ac669f 100644 --- a/trunk/include/asm-x86_64/apicdef.h +++ b/trunk/include/asm-x86_64/apicdef.h @@ -113,6 +113,7 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define MAX_IO_APICS 128 +#define MAX_LOCAL_APIC 256 /* * All x86-64 systems are xAPIC compatible. diff --git a/trunk/include/asm-x86_64/bug.h b/trunk/include/asm-x86_64/bug.h index eed785667289..80ac1fe966ac 100644 --- a/trunk/include/asm-x86_64/bug.h +++ b/trunk/include/asm-x86_64/bug.h @@ -9,10 +9,8 @@ */ struct bug_frame { unsigned char ud2[2]; - unsigned char mov; - /* should use 32bit offset instead, but the assembler doesn't - like it */ - char *filename; + unsigned char push; + signed int filename; unsigned char ret; unsigned short line; } __attribute__((packed)); @@ -25,8 +23,8 @@ struct bug_frame { The magic numbers generate mov $64bitimm,%eax ; ret $offset. */ #define BUG() \ asm volatile( \ - "ud2 ; .byte 0xa3 ; .quad %c1 ; .byte 0xc2 ; .short %c0" :: \ - "i"(__LINE__), "i" (__stringify(__FILE__))) + "ud2 ; pushq $%c1 ; ret $%c0" :: \ + "i"(__LINE__), "i" (__FILE__)) void out_of_line_bug(void); #else static inline void out_of_line_bug(void) { } diff --git a/trunk/include/asm-x86_64/calling.h b/trunk/include/asm-x86_64/calling.h index 0bc12655fa5b..fc2c5a6c262a 100644 --- a/trunk/include/asm-x86_64/calling.h +++ b/trunk/include/asm-x86_64/calling.h @@ -65,27 +65,36 @@ .if \skipr11 .else movq (%rsp),%r11 + CFI_RESTORE r11 .endif .if \skipr8910 .else movq 1*8(%rsp),%r10 + CFI_RESTORE r10 movq 2*8(%rsp),%r9 + CFI_RESTORE r9 movq 3*8(%rsp),%r8 + CFI_RESTORE r8 .endif .if \skiprax .else movq 4*8(%rsp),%rax + CFI_RESTORE rax .endif .if \skiprcx .else movq 5*8(%rsp),%rcx + CFI_RESTORE rcx .endif .if \skiprdx .else movq 6*8(%rsp),%rdx + CFI_RESTORE rdx .endif movq 7*8(%rsp),%rsi + CFI_RESTORE rsi movq 8*8(%rsp),%rdi + CFI_RESTORE rdi .if ARG_SKIP+\addskip > 0 addq $ARG_SKIP+\addskip,%rsp CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) @@ -124,11 +133,17 @@ .macro RESTORE_REST movq (%rsp),%r15 + CFI_RESTORE r15 movq 1*8(%rsp),%r14 + CFI_RESTORE r14 movq 2*8(%rsp),%r13 + CFI_RESTORE r13 movq 3*8(%rsp),%r12 + CFI_RESTORE r12 movq 4*8(%rsp),%rbp + CFI_RESTORE rbp movq 5*8(%rsp),%rbx + CFI_RESTORE rbx addq $REST_SKIP,%rsp CFI_ADJUST_CFA_OFFSET -(REST_SKIP) .endm @@ -146,11 +161,3 @@ .macro icebp .byte 0xf1 .endm - -#ifdef CONFIG_FRAME_POINTER -#define ENTER enter -#define LEAVE leave -#else -#define ENTER -#define LEAVE -#endif diff --git a/trunk/include/asm-x86_64/desc.h b/trunk/include/asm-x86_64/desc.h index c89b58bebee2..594e610f4a1e 100644 --- a/trunk/include/asm-x86_64/desc.h +++ b/trunk/include/asm-x86_64/desc.h @@ -191,7 +191,7 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu) /* * load one particular LDT into the current CPU */ -extern inline void load_LDT_nolock (mm_context_t *pc, int cpu) +static inline void load_LDT_nolock (mm_context_t *pc, int cpu) { int count = pc->size; diff --git a/trunk/include/asm-x86_64/dma-mapping.h b/trunk/include/asm-x86_64/dma-mapping.h index a416dc31634a..e784fdc524f1 100644 --- a/trunk/include/asm-x86_64/dma-mapping.h +++ b/trunk/include/asm-x86_64/dma-mapping.h @@ -85,6 +85,11 @@ static inline void dma_sync_single_for_device(struct device *hwdev, flush_write_buffers(); } +#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \ + dma_sync_single_for_cpu(dev, dma_handle, size, dir) +#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ + dma_sync_single_for_device(dev, dma_handle, size, dir) + static inline void dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) diff --git a/trunk/include/asm-x86_64/dwarf2.h b/trunk/include/asm-x86_64/dwarf2.h index afd4212e860b..582757fc0365 100644 --- a/trunk/include/asm-x86_64/dwarf2.h +++ b/trunk/include/asm-x86_64/dwarf2.h @@ -24,6 +24,10 @@ #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset #define CFI_OFFSET .cfi_offset #define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state #else @@ -36,6 +40,10 @@ #define CFI_ADJUST_CFA_OFFSET # #define CFI_OFFSET # #define CFI_REL_OFFSET # +#define CFI_REGISTER # +#define CFI_RESTORE # +#define CFI_REMEMBER_STATE # +#define CFI_RESTORE_STATE # #endif diff --git a/trunk/include/asm-x86_64/fixmap.h b/trunk/include/asm-x86_64/fixmap.h index cf8b16cbe8db..a582cfcf2231 100644 --- a/trunk/include/asm-x86_64/fixmap.h +++ b/trunk/include/asm-x86_64/fixmap.h @@ -76,7 +76,7 @@ extern void __this_fixmap_does_not_exist(void); * directly without translation, we catch the bug with a NULL-deference * kernel oops. Illegal ranges of incoming indices are caught too. */ -extern inline unsigned long fix_to_virt(const unsigned int idx) +static inline unsigned long fix_to_virt(const unsigned int idx) { /* * this branch gets completely eliminated after inlining, diff --git a/trunk/include/asm-x86_64/hardirq.h b/trunk/include/asm-x86_64/hardirq.h index 27c381fa1c9d..8661b476fb40 100644 --- a/trunk/include/asm-x86_64/hardirq.h +++ b/trunk/include/asm-x86_64/hardirq.h @@ -9,11 +9,12 @@ #define __ARCH_IRQ_STAT 1 -/* Generate a lvalue for a pda member. Should fix softirq.c instead to use - special access macros. This would generate better code. */ -#define __IRQ_STAT(cpu,member) (read_pda(me)->member) +#define local_softirq_pending() read_pda(__softirq_pending) -#include /* Standard mappings for irq_cpustat_t above */ +#define __ARCH_SET_SOFTIRQ_PENDING 1 + +#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) +#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) /* * 'what should we do if we get a hw irq event on an illegal vector'. diff --git a/trunk/include/asm-x86_64/hw_irq.h b/trunk/include/asm-x86_64/hw_irq.h index 2b5cb2865d21..dc97668ea0f9 100644 --- a/trunk/include/asm-x86_64/hw_irq.h +++ b/trunk/include/asm-x86_64/hw_irq.h @@ -26,6 +26,7 @@ struct hw_interrupt_type; #endif +#define NMI_VECTOR 0x02 /* * IDT vectors usable for external interrupt sources start * at 0x20: @@ -50,14 +51,15 @@ struct hw_interrupt_type; */ #define SPURIOUS_APIC_VECTOR 0xff #define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define TASK_MIGRATION_VECTOR 0xfb -#define CALL_FUNCTION_VECTOR 0xfa -#define KDB_VECTOR 0xf9 - -#define THERMAL_APIC_VECTOR 0xf0 - +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define KDB_VECTOR 0xfb /* reserved for KDB */ +#define THERMAL_APIC_VECTOR 0xfa +/* 0xf9 free */ +#define INVALIDATE_TLB_VECTOR_END 0xf8 +#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f8 used for TLB flush */ + +#define NUM_INVALIDATE_TLB_VECTORS 8 /* * Local APIC timer IRQ vector is on a different priority level, diff --git a/trunk/include/asm-x86_64/io.h b/trunk/include/asm-x86_64/io.h index 37fc3f149a5a..52ff269fe054 100644 --- a/trunk/include/asm-x86_64/io.h +++ b/trunk/include/asm-x86_64/io.h @@ -48,7 +48,7 @@ * Talk about misusing macros.. */ #define __OUT1(s,x) \ -extern inline void out##s(unsigned x value, unsigned short port) { +static inline void out##s(unsigned x value, unsigned short port) { #define __OUT2(s,s1,s2) \ __asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" @@ -58,7 +58,7 @@ __OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ __OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ #define __IN1(s) \ -extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; #define __IN2(s,s1,s2) \ __asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" @@ -68,12 +68,12 @@ __IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ __IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ #define __INS(s) \ -extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ { __asm__ __volatile__ ("rep ; ins" #s \ : "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } #define __OUTS(s) \ -extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ { __asm__ __volatile__ ("rep ; outs" #s \ : "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } @@ -110,12 +110,12 @@ __OUTS(l) * Change virtual addresses to physical addresses and vv. * These are pretty trivial */ -extern inline unsigned long virt_to_phys(volatile void * address) +static inline unsigned long virt_to_phys(volatile void * address) { return __pa(address); } -extern inline void * phys_to_virt(unsigned long address) +static inline void * phys_to_virt(unsigned long address) { return __va(address); } @@ -130,7 +130,7 @@ extern inline void * phys_to_virt(unsigned long address) extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags); -extern inline void __iomem * ioremap (unsigned long offset, unsigned long size) +static inline void __iomem * ioremap (unsigned long offset, unsigned long size) { return __ioremap(offset, size, 0); } diff --git a/trunk/include/asm-x86_64/ipi.h b/trunk/include/asm-x86_64/ipi.h index 5e166b9d3bde..022e9d340ad7 100644 --- a/trunk/include/asm-x86_64/ipi.h +++ b/trunk/include/asm-x86_64/ipi.h @@ -31,9 +31,20 @@ static inline unsigned int __prepare_ICR (unsigned int shortcut, int vector, unsigned int dest) { - unsigned int icr = APIC_DM_FIXED | shortcut | vector | dest; - if (vector == KDB_VECTOR) - icr = (icr & (~APIC_VECTOR_MASK)) | APIC_DM_NMI; + unsigned int icr = shortcut | dest; + + switch (vector) { + default: + icr |= APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + /* + * Setup KDB IPI to be delivered as an NMI + */ + case KDB_VECTOR: + icr |= APIC_DM_NMI; + break; + } return icr; } @@ -66,7 +77,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsign /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } @@ -92,7 +103,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * prepare target chip field */ cfg = __prepare_ICR2(x86_cpu_to_apicid[query_cpu]); - apic_write_around(APIC_ICR2, cfg); + apic_write(APIC_ICR2, cfg); /* * program the ICR @@ -102,7 +113,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write_around(APIC_ICR, cfg); + apic_write(APIC_ICR, cfg); } local_irq_restore(flags); } diff --git a/trunk/include/asm-x86_64/kdebug.h b/trunk/include/asm-x86_64/kdebug.h index b90341994d80..f604e84c5303 100644 --- a/trunk/include/asm-x86_64/kdebug.h +++ b/trunk/include/asm-x86_64/kdebug.h @@ -46,7 +46,7 @@ extern void die(const char *,struct pt_regs *,long); extern void __die(const char *,struct pt_regs *,long); extern void show_registers(struct pt_regs *regs); extern void dump_pagetable(unsigned long); -extern void oops_begin(void); -extern void oops_end(void); +extern unsigned long oops_begin(void); +extern void oops_end(unsigned long); #endif diff --git a/trunk/include/asm-x86_64/local.h b/trunk/include/asm-x86_64/local.h index c954f15c1a75..3e72c41727c5 100644 --- a/trunk/include/asm-x86_64/local.h +++ b/trunk/include/asm-x86_64/local.h @@ -29,7 +29,7 @@ static __inline__ void local_dec(local_t *v) :"m" (v->counter)); } -static __inline__ void local_add(unsigned long i, local_t *v) +static __inline__ void local_add(unsigned int i, local_t *v) { __asm__ __volatile__( "addl %1,%0" @@ -37,7 +37,7 @@ static __inline__ void local_add(unsigned long i, local_t *v) :"ir" (i), "m" (v->counter)); } -static __inline__ void local_sub(unsigned long i, local_t *v) +static __inline__ void local_sub(unsigned int i, local_t *v) { __asm__ __volatile__( "subl %1,%0" diff --git a/trunk/include/asm-x86_64/mmzone.h b/trunk/include/asm-x86_64/mmzone.h index 768413751b34..b40c661f111e 100644 --- a/trunk/include/asm-x86_64/mmzone.h +++ b/trunk/include/asm-x86_64/mmzone.h @@ -12,7 +12,7 @@ #include -#define NODEMAPSIZE 0xff +#define NODEMAPSIZE 0xfff /* Simple perfect hash to map physical addresses to node numbers */ extern int memnode_shift; @@ -54,7 +54,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define pfn_valid(pfn) ((pfn) >= num_physpages ? 0 : \ ({ u8 nid__ = pfn_to_nid(pfn); \ - nid__ != 0xff && (pfn) >= node_start_pfn(nid__) && (pfn) <= node_end_pfn(nid__); })) + nid__ != 0xff && (pfn) >= node_start_pfn(nid__) && (pfn) < node_end_pfn(nid__); })) #endif #define local_mapnr(kvaddr) \ diff --git a/trunk/include/asm-x86_64/msr.h b/trunk/include/asm-x86_64/msr.h index ba15279a79d0..4d727f3f5550 100644 --- a/trunk/include/asm-x86_64/msr.h +++ b/trunk/include/asm-x86_64/msr.h @@ -29,22 +29,37 @@ #define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) /* wrmsr with exception handling */ -#define wrmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: wrmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n\t" \ - " .quad 2b,3b\n\t" \ - ".previous" \ - : "=a" (ret__) \ - : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT));\ +#define wrmsr_safe(msr,a,b) ({ int ret__; \ + asm volatile("2: wrmsr ; xorl %0,%0\n" \ + "1:\n\t" \ + ".section .fixup,\"ax\"\n\t" \ + "3: movl %4,%0 ; jmp 1b\n\t" \ + ".previous\n\t" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n\t" \ + " .quad 2b,3b\n\t" \ + ".previous" \ + : "=a" (ret__) \ + : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \ ret__; }) #define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32)) +#define rdmsr_safe(msr,a,b) \ + ({ int ret__; \ + asm volatile ("1: rdmsr\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %4,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad 1b,3b\n" \ + ".previous":"=&bDS" (ret__), "=a"(a), "=d"(b)\ + :"c"(msr), "i"(-EIO), "0"(0)); \ + ret__; }) + #define rdtsc(low,high) \ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) @@ -64,7 +79,7 @@ : "=a" (low), "=d" (high) \ : "c" (counter)) -extern inline void cpuid(int op, unsigned int *eax, unsigned int *ebx, +static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { __asm__("cpuid" @@ -90,7 +105,7 @@ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, /* * CPUID functions returning a single datum */ -extern inline unsigned int cpuid_eax(unsigned int op) +static inline unsigned int cpuid_eax(unsigned int op) { unsigned int eax; @@ -100,7 +115,7 @@ extern inline unsigned int cpuid_eax(unsigned int op) : "bx", "cx", "dx"); return eax; } -extern inline unsigned int cpuid_ebx(unsigned int op) +static inline unsigned int cpuid_ebx(unsigned int op) { unsigned int eax, ebx; @@ -110,7 +125,7 @@ extern inline unsigned int cpuid_ebx(unsigned int op) : "cx", "dx" ); return ebx; } -extern inline unsigned int cpuid_ecx(unsigned int op) +static inline unsigned int cpuid_ecx(unsigned int op) { unsigned int eax, ecx; @@ -120,7 +135,7 @@ extern inline unsigned int cpuid_ecx(unsigned int op) : "bx", "dx" ); return ecx; } -extern inline unsigned int cpuid_edx(unsigned int op) +static inline unsigned int cpuid_edx(unsigned int op) { unsigned int eax, edx; diff --git a/trunk/include/asm-x86_64/numa.h b/trunk/include/asm-x86_64/numa.h index 5c363a1482e4..bcf55c3f7f7f 100644 --- a/trunk/include/asm-x86_64/numa.h +++ b/trunk/include/asm-x86_64/numa.h @@ -9,6 +9,7 @@ struct node { }; extern int compute_hash_shift(struct node *nodes, int numnodes); +extern int pxm_to_node(int nid); #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) @@ -16,6 +17,8 @@ extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; +extern unsigned char apicid_to_node[256]; + #define NUMA_NO_NODE 0xff #endif diff --git a/trunk/include/asm-x86_64/page.h b/trunk/include/asm-x86_64/page.h index 135ffaa0393b..e5ab4d231f2c 100644 --- a/trunk/include/asm-x86_64/page.h +++ b/trunk/include/asm-x86_64/page.h @@ -32,6 +32,8 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +extern unsigned long end_pfn; + void clear_page(void *); void copy_page(void *, void *); @@ -111,7 +113,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #ifdef CONFIG_FLATMEM #define pfn_to_page(pfn) (mem_map + (pfn)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map)) -#define pfn_valid(pfn) ((pfn) < max_mapnr) +#define pfn_valid(pfn) ((pfn) < end_pfn) #endif #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) diff --git a/trunk/include/asm-x86_64/pci.h b/trunk/include/asm-x86_64/pci.h index eeb3088a1c9e..5a82a6762c21 100644 --- a/trunk/include/asm-x86_64/pci.h +++ b/trunk/include/asm-x86_64/pci.h @@ -50,10 +50,10 @@ extern int iommu_setup(char *opt); * address space. The networking and block device layers use * this boolean for bounce buffer decisions * - * On AMD64 it mostly equals, but we set it to zero to tell some subsystems - * that an IOMMU is available. + * On x86-64 it mostly equals, but we set it to zero to tell some subsystems + * that an hard or soft IOMMU is available. */ -#define PCI_DMA_BUS_IS_PHYS (no_iommu ? 1 : 0) +#define PCI_DMA_BUS_IS_PHYS 0 /* * x86-64 always supports DAC, but sometimes it is useful to force diff --git a/trunk/include/asm-x86_64/pda.h b/trunk/include/asm-x86_64/pda.h index 36b766cfc4d5..bbf89aa8a1af 100644 --- a/trunk/include/asm-x86_64/pda.h +++ b/trunk/include/asm-x86_64/pda.h @@ -10,10 +10,8 @@ struct x8664_pda { struct task_struct *pcurrent; /* Current process */ unsigned long data_offset; /* Per cpu data offset from linker address */ - struct x8664_pda *me; /* Pointer to itself */ unsigned long kernelstack; /* top of kernel stack for current */ unsigned long oldrsp; /* user rsp for system call */ - unsigned long irqrsp; /* Old rsp for interrupts. */ int irqcount; /* Irq nesting counter. Starts with -1 */ int cpunumber; /* Logical CPU number */ char *irqstackptr; /* top of irqstack */ @@ -22,7 +20,7 @@ struct x8664_pda { struct mm_struct *active_mm; int mmu_state; unsigned apic_timer_irqs; -} ____cacheline_aligned; +} ____cacheline_aligned_in_smp; #define IRQSTACK_ORDER 2 @@ -42,13 +40,14 @@ extern void __bad_pda_field(void); #define pda_offset(field) offsetof(struct x8664_pda, field) #define pda_to_op(op,field,val) do { \ + typedef typeof_field(struct x8664_pda, field) T__; \ switch (sizeof_field(struct x8664_pda, field)) { \ case 2: \ -asm volatile(op "w %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ +asm volatile(op "w %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ case 4: \ -asm volatile(op "l %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ +asm volatile(op "l %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ case 8: \ -asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \ +asm volatile(op "q %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ default: __bad_pda_field(); \ } \ } while (0) @@ -58,7 +57,7 @@ asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); bre * Unfortunately removing them causes all hell to break lose currently. */ #define pda_from_op(op,field) ({ \ - typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \ + typeof_field(struct x8664_pda, field) ret__; \ switch (sizeof_field(struct x8664_pda, field)) { \ case 2: \ asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ @@ -75,6 +74,7 @@ asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); b #define write_pda(field,val) pda_to_op("mov",field,val) #define add_pda(field,val) pda_to_op("add",field,val) #define sub_pda(field,val) pda_to_op("sub",field,val) +#define or_pda(field,val) pda_to_op("or",field,val) #endif diff --git a/trunk/include/asm-x86_64/pgalloc.h b/trunk/include/asm-x86_64/pgalloc.h index deadd146978b..08cad2482bcb 100644 --- a/trunk/include/asm-x86_64/pgalloc.h +++ b/trunk/include/asm-x86_64/pgalloc.h @@ -18,12 +18,12 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); } -extern __inline__ pmd_t *get_pmd(void) +static inline pmd_t *get_pmd(void) { return (pmd_t *)get_zeroed_page(GFP_KERNEL); } -extern __inline__ void pmd_free(pmd_t *pmd) +static inline void pmd_free(pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); free_page((unsigned long)pmd); @@ -86,13 +86,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long add /* Should really implement gc for free page table pages. This could be done with a reference count in struct page. */ -extern __inline__ void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(pte_t *pte) { BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); free_page((unsigned long)pte); } -extern inline void pte_free(struct page *pte) +static inline void pte_free(struct page *pte) { __free_page(pte); } diff --git a/trunk/include/asm-x86_64/pgtable.h b/trunk/include/asm-x86_64/pgtable.h index 5e0f2fdab0d3..1dc110ba82d6 100644 --- a/trunk/include/asm-x86_64/pgtable.h +++ b/trunk/include/asm-x86_64/pgtable.h @@ -85,7 +85,7 @@ static inline void set_pud(pud_t *dst, pud_t val) pud_val(*dst) = pud_val(val); } -extern inline void pud_clear (pud_t *pud) +static inline void pud_clear (pud_t *pud) { set_pud(pud, __pud(0)); } @@ -95,7 +95,7 @@ static inline void set_pgd(pgd_t *dst, pgd_t val) pgd_val(*dst) = pgd_val(val); } -extern inline void pgd_clear (pgd_t * pgd) +static inline void pgd_clear (pgd_t * pgd) { set_pgd(pgd, __pgd(0)); } @@ -375,7 +375,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) } /* Change flags of a PTE */ -extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); diff --git a/trunk/include/asm-x86_64/processor.h b/trunk/include/asm-x86_64/processor.h index a8321999448f..03837d34fba0 100644 --- a/trunk/include/asm-x86_64/processor.h +++ b/trunk/include/asm-x86_64/processor.h @@ -254,7 +254,13 @@ struct thread_struct { u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; } __attribute__((aligned(16))); -#define INIT_THREAD {} +#define INIT_THREAD { \ + .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +} + +#define INIT_TSS { \ + .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ +} #define INIT_MMAP \ { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } @@ -375,13 +381,13 @@ struct extended_sigtable { #define ASM_NOP_MAX 8 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -extern inline void rep_nop(void) +static inline void rep_nop(void) { __asm__ __volatile__("rep;nop": : :"memory"); } /* Stop speculative execution */ -extern inline void sync_core(void) +static inline void sync_core(void) { int tmp; asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); diff --git a/trunk/include/asm-x86_64/proto.h b/trunk/include/asm-x86_64/proto.h index f7574196424e..dbb37b0adb43 100644 --- a/trunk/include/asm-x86_64/proto.h +++ b/trunk/include/asm-x86_64/proto.h @@ -74,9 +74,6 @@ extern void acpi_reserve_bootmem(void); extern void swap_low_mappings(void); -extern void oops_begin(void); -extern void die(const char *,struct pt_regs *,long); -extern void __die(const char * str, struct pt_regs * regs, long err); extern void __show_regs(struct pt_regs * regs); extern void show_regs(struct pt_regs * regs); @@ -93,8 +90,6 @@ extern int unhandled_signal(struct task_struct *tsk, int sig); extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void swiotlb_init(void); -extern unsigned long max_mapnr; -extern unsigned long end_pfn; extern unsigned long table_start, table_end; extern int exception_trace; diff --git a/trunk/include/asm-x86_64/signal.h b/trunk/include/asm-x86_64/signal.h index fe9b96d94815..f8d55798535a 100644 --- a/trunk/include/asm-x86_64/signal.h +++ b/trunk/include/asm-x86_64/signal.h @@ -143,23 +143,23 @@ typedef struct sigaltstack { #undef __HAVE_ARCH_SIG_BITOPS #if 0 -extern __inline__ void sigaddset(sigset_t *set, int _sig) +static inline void sigaddset(sigset_t *set, int _sig) { __asm__("btsq %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc"); } -extern __inline__ void sigdelset(sigset_t *set, int _sig) +static inline void sigdelset(sigset_t *set, int _sig) { __asm__("btrq %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc"); } -extern __inline__ int __const_sigismember(sigset_t *set, int _sig) +static inline int __const_sigismember(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; return 1 & (set->sig[sig / _NSIG_BPW] >> (sig & ~(_NSIG_BPW-1))); } -extern __inline__ int __gen_sigismember(sigset_t *set, int _sig) +static inline int __gen_sigismember(sigset_t *set, int _sig) { int ret; __asm__("btq %2,%1\n\tsbbq %0,%0" @@ -172,7 +172,7 @@ extern __inline__ int __gen_sigismember(sigset_t *set, int _sig) __const_sigismember((set),(sig)) : \ __gen_sigismember((set),(sig))) -extern __inline__ int sigfindinword(unsigned long word) +static inline int sigfindinword(unsigned long word) { __asm__("bsfq %1,%0" : "=r"(word) : "rm"(word) : "cc"); return word; diff --git a/trunk/include/asm-x86_64/smp.h b/trunk/include/asm-x86_64/smp.h index de8b57b2b62b..24e32611f0bf 100644 --- a/trunk/include/asm-x86_64/smp.h +++ b/trunk/include/asm-x86_64/smp.h @@ -72,7 +72,7 @@ static inline int num_booting_cpus(void) #define raw_smp_processor_id() read_pda(cpunumber) -extern __inline int hard_smp_processor_id(void) +static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); diff --git a/trunk/include/asm-x86_64/system.h b/trunk/include/asm-x86_64/system.h index 8606e170a7dc..85348e02ad2e 100644 --- a/trunk/include/asm-x86_64/system.h +++ b/trunk/include/asm-x86_64/system.h @@ -188,7 +188,7 @@ static inline void write_cr4(unsigned long val) #define __xg(x) ((volatile long *)(x)) -extern inline void set_64bit(volatile unsigned long *ptr, unsigned long val) +static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) { *ptr = val; } @@ -253,19 +253,19 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, case 2: __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 4: __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2" : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 8: __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2" : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; } diff --git a/trunk/include/asm-x86_64/timex.h b/trunk/include/asm-x86_64/timex.h index 24ecf6a637cb..f971f45d6d78 100644 --- a/trunk/include/asm-x86_64/timex.h +++ b/trunk/include/asm-x86_64/timex.h @@ -6,7 +6,6 @@ #ifndef _ASMx8664_TIMEX_H #define _ASMx8664_TIMEX_H -#include #include #include #include diff --git a/trunk/include/asm-x86_64/tlbflush.h b/trunk/include/asm-x86_64/tlbflush.h index 505b0cf906de..4a9c20ea9b10 100644 --- a/trunk/include/asm-x86_64/tlbflush.h +++ b/trunk/include/asm-x86_64/tlbflush.h @@ -109,6 +109,10 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 +/* Roughly an IPI every 20MB with 4k pages for freeing page table + ranges. Cost is about 42k of memory for each CPU. */ +#define ARCH_FREE_PTE_NR 5350 + #endif #define flush_tlb_kernel_range(start, end) flush_tlb_all() diff --git a/trunk/include/asm-x86_64/topology.h b/trunk/include/asm-x86_64/topology.h index c1bc3fad482e..1c603cd7e4d0 100644 --- a/trunk/include/asm-x86_64/topology.h +++ b/trunk/include/asm-x86_64/topology.h @@ -13,7 +13,6 @@ extern cpumask_t cpu_online_map; extern unsigned char cpu_to_node[]; -extern unsigned char pci_bus_to_node[]; extern cpumask_t node_to_cpumask[]; #ifdef CONFIG_ACPI_NUMA @@ -26,7 +25,7 @@ extern int __node_distance(int, int); #define parent_node(node) (node) #define node_to_first_cpu(node) (__ffs(node_to_cpumask[node])) #define node_to_cpumask(node) (node_to_cpumask[node]) -#define pcibus_to_node(bus) pci_bus_to_node[(bus)->number] +#define pcibus_to_node(bus) ((long)(bus->sysdata)) #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); /* sched_domains SD_NODE_INIT for x86_64 machines */ diff --git a/trunk/include/asm-x86_64/vsyscall.h b/trunk/include/asm-x86_64/vsyscall.h index 2872da23fc7e..438a3f52f839 100644 --- a/trunk/include/asm-x86_64/vsyscall.h +++ b/trunk/include/asm-x86_64/vsyscall.h @@ -29,7 +29,6 @@ enum vsyscall_num { struct vxtime_data { long hpet_address; /* HPET base address */ - unsigned long hz; /* HPET clocks / sec */ int last; unsigned long last_tsc; long quot; diff --git a/trunk/include/linux/dmi.h b/trunk/include/linux/dmi.h index c30175e8dec6..a415f1d93e9a 100644 --- a/trunk/include/linux/dmi.h +++ b/trunk/include/linux/dmi.h @@ -70,7 +70,7 @@ extern struct dmi_device * dmi_find_device(int type, const char *name, static inline int dmi_check_system(struct dmi_system_id *list) { return 0; } static inline char * dmi_get_system_info(int field) { return NULL; } -static struct dmi_device * dmi_find_device(int type, const char *name, +static inline struct dmi_device * dmi_find_device(int type, const char *name, struct dmi_device *from) { return NULL; } #endif diff --git a/trunk/include/linux/interrupt.h b/trunk/include/linux/interrupt.h index d99e7aeb7d33..0a90205184b0 100644 --- a/trunk/include/linux/interrupt.h +++ b/trunk/include/linux/interrupt.h @@ -57,6 +57,11 @@ extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); #endif +#ifndef __ARCH_SET_SOFTIRQ_PENDING +#define set_softirq_pending(x) (local_softirq_pending() = (x)) +#define or_softirq_pending(x) (local_softirq_pending() |= (x)) +#endif + /* * Temporary defines for UP kernels, until all code gets fixed. */ @@ -123,7 +128,7 @@ struct softirq_action asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0) +#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); extern void FASTCALL(raise_softirq(unsigned int nr)); diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index ed3bb19d1337..38c8654aaa96 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -785,7 +785,6 @@ struct task_struct { short il_next; #endif #ifdef CONFIG_CPUSETS - short cpuset_sem_nest_depth; struct cpuset *cpuset; nodemask_t mems_allowed; int cpuset_mems_generation; diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c index 407b5f0a8c8e..79866bc6b3a1 100644 --- a/trunk/kernel/cpuset.c +++ b/trunk/kernel/cpuset.c @@ -180,6 +180,8 @@ static struct super_block *cpuset_sb = NULL; */ static DECLARE_MUTEX(cpuset_sem); +static struct task_struct *cpuset_sem_owner; +static int cpuset_sem_depth; /* * The global cpuset semaphore cpuset_sem can be needed by the @@ -200,16 +202,19 @@ static DECLARE_MUTEX(cpuset_sem); static inline void cpuset_down(struct semaphore *psem) { - if (current->cpuset_sem_nest_depth == 0) + if (cpuset_sem_owner != current) { down(psem); - current->cpuset_sem_nest_depth++; + cpuset_sem_owner = current; + } + cpuset_sem_depth++; } static inline void cpuset_up(struct semaphore *psem) { - current->cpuset_sem_nest_depth--; - if (current->cpuset_sem_nest_depth == 0) + if (--cpuset_sem_depth == 0) { + cpuset_sem_owner = NULL; up(psem); + } } /* diff --git a/trunk/kernel/softirq.c b/trunk/kernel/softirq.c index b4ab6af1dea8..f766b2fc48be 100644 --- a/trunk/kernel/softirq.c +++ b/trunk/kernel/softirq.c @@ -84,7 +84,7 @@ asmlinkage void __do_softirq(void) cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ - local_softirq_pending() = 0; + set_softirq_pending(0); local_irq_enable(); diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug index 3754c9a8f5c8..016e89a44ac8 100644 --- a/trunk/lib/Kconfig.debug +++ b/trunk/lib/Kconfig.debug @@ -170,11 +170,11 @@ config DEBUG_FS config FRAME_POINTER bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL && ((X86 && !X86_64) || CRIS || M68K || M68KNOMMU || FRV || UML) + depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML) default y if DEBUG_INFO && UML help If you say Y here the resulting kernel image will be slightly larger - and slower, but it will give very useful debugging information. - If you don't debug the kernel, you can say N, but we may not be able - to solve problems without frame pointers. + and slower, but it might give very useful debugging information + on some architectures or you use external debuggers. + If you don't debug the kernel, you can say N. diff --git a/trunk/mm/bootmem.c b/trunk/mm/bootmem.c index c1330cc19783..8ec4e4c2a179 100644 --- a/trunk/mm/bootmem.c +++ b/trunk/mm/bootmem.c @@ -61,9 +61,17 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, { bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize = ((end - start)+7)/8; - - pgdat->pgdat_next = pgdat_list; - pgdat_list = pgdat; + static struct pglist_data *pgdat_last; + + pgdat->pgdat_next = NULL; + /* Add new nodes last so that bootmem always starts + searching in the first nodes, not the last ones */ + if (pgdat_last) + pgdat_last->pgdat_next = pgdat; + else { + pgdat_list = pgdat; + pgdat_last = pgdat; + } mapsize = ALIGN(mapsize, sizeof(long)); bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);