diff --git a/[refs] b/[refs] index 63c10a9ff9f1..09eb1c8e883e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 0fbba4871c42b297b0d6a6a6ff73e934cb91423e +refs/heads/master: acaabe795a62bba089c185917af86b44654313dc diff --git a/trunk/Documentation/x86/earlyprintk.txt b/trunk/Documentation/x86/earlyprintk.txt deleted file mode 100644 index 607b1a016064..000000000000 --- a/trunk/Documentation/x86/earlyprintk.txt +++ /dev/null @@ -1,101 +0,0 @@ - -Mini-HOWTO for using the earlyprintk=dbgp boot option with a -USB2 Debug port key and a debug cable, on x86 systems. - -You need two computers, the 'USB debug key' special gadget and -and two USB cables, connected like this: - - [host/target] <-------> [USB debug key] <-------> [client/console] - -1. There are three specific hardware requirements: - - a.) Host/target system needs to have USB debug port capability. - - You can check this capability by looking at a 'Debug port' bit in - the lspci -vvv output: - - # lspci -vvv - ... - 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI]) - Subsystem: Lenovo ThinkPad T61 - Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- - Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- SERR- /proc/sysrq-trigger - - On the host/target system you should see this help line in "dmesg" output: - - SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z) - - On the client/console system do: - - cat /dev/ttyUSB0 - - And you should see the help line above displayed shortly after you've - provoked it on the host system. - -If it does not work then please ask about it on the linux-kernel@vger.kernel.org -mailing list or contact the x86 maintainers. diff --git a/trunk/Makefile b/trunk/Makefile index c40d83aedebe..27fb890a2bff 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 29 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc6 NAME = Erotic Pickled Herring # *DOCUMENTATION* diff --git a/trunk/arch/arm/kernel/setup.c b/trunk/arch/arm/kernel/setup.c index 68d6494c0389..7049815d66d5 100644 --- a/trunk/arch/arm/kernel/setup.c +++ b/trunk/arch/arm/kernel/setup.c @@ -233,13 +233,12 @@ static void __init cacheid_init(void) unsigned int cachetype = read_cpuid_cachetype(); unsigned int arch = cpu_architecture(); - if (arch >= CPU_ARCH_ARMv6) { - if ((cachetype & (7 << 29)) == 4 << 29) { - /* ARMv7 register format */ - cacheid = CACHEID_VIPT_NONALIASING; - if ((cachetype & (3 << 14)) == 1 << 14) - cacheid |= CACHEID_ASID_TAGGED; - } else if (cachetype & (1 << 23)) + if (arch >= CPU_ARCH_ARMv7) { + cacheid = CACHEID_VIPT_NONALIASING; + if ((cachetype & (3 << 14)) == 1 << 14) + cacheid |= CACHEID_ASID_TAGGED; + } else if (arch >= CPU_ARCH_ARMv6) { + if (cachetype & (1 << 23)) cacheid = CACHEID_VIPT_ALIASING; else cacheid = CACHEID_VIPT_NONALIASING; diff --git a/trunk/arch/arm/mach-at91/pm.c b/trunk/arch/arm/mach-at91/pm.c index 7ac812dc055a..9bb4f043aa22 100644 --- a/trunk/arch/arm/mach-at91/pm.c +++ b/trunk/arch/arm/mach-at91/pm.c @@ -332,6 +332,7 @@ static int at91_pm_enter(suspend_state_t state) at91_sys_read(AT91_AIC_IPR) & at91_sys_read(AT91_AIC_IMR)); error: + sdram_selfrefresh_disable(); target_state = PM_SUSPEND_ON; at91_irq_resume(); at91_gpio_resume(); diff --git a/trunk/arch/arm/mm/abort-ev6.S b/trunk/arch/arm/mm/abort-ev6.S index 94077fbd96b7..8a7f65ba14b7 100644 --- a/trunk/arch/arm/mm/abort-ev6.S +++ b/trunk/arch/arm/mm/abort-ev6.S @@ -23,8 +23,7 @@ ENTRY(v6_early_abort) #ifdef CONFIG_CPU_32v6K clrex #else - sub r1, sp, #4 @ Get unused stack location - strex r0, r1, [r1] @ Clear the exclusive monitor + strex r0, r1, [sp] @ Clear the exclusive monitor #endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR diff --git a/trunk/arch/arm/plat-s3c64xx/irq-eint.c b/trunk/arch/arm/plat-s3c64xx/irq-eint.c index ebb305ce7689..1f7cc0067f5c 100644 --- a/trunk/arch/arm/plat-s3c64xx/irq-eint.c +++ b/trunk/arch/arm/plat-s3c64xx/irq-eint.c @@ -55,7 +55,7 @@ static void s3c_irq_eint_unmask(unsigned int irq) u32 mask; mask = __raw_readl(S3C64XX_EINT0MASK); - mask &= ~eint_irq_to_bit(irq); + mask |= eint_irq_to_bit(irq); __raw_writel(mask, S3C64XX_EINT0MASK); } diff --git a/trunk/arch/powerpc/platforms/86xx/gef_sbc610.c b/trunk/arch/powerpc/platforms/86xx/gef_sbc610.c index d6b772ba3b8f..fb371f5ce132 100644 --- a/trunk/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/trunk/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -142,10 +142,6 @@ static void __init gef_sbc610_nec_fixup(struct pci_dev *pdev) { unsigned int val; - /* Do not do the fixup on other platforms! */ - if (!machine_is(gef_sbc610)) - return; - printk(KERN_INFO "Running NEC uPD720101 Fixup\n"); /* Ensure ports 1, 2, 3, 4 & 5 are enabled */ diff --git a/trunk/arch/s390/crypto/aes_s390.c b/trunk/arch/s390/crypto/aes_s390.c index 6118890c946d..c42cd898f68b 100644 --- a/trunk/arch/s390/crypto/aes_s390.c +++ b/trunk/arch/s390/crypto/aes_s390.c @@ -556,7 +556,7 @@ static void __exit aes_s390_fini(void) module_init(aes_s390_init); module_exit(aes_s390_fini); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS("aes"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 31758378bcd2..f5cef3fbf9a5 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -783,11 +783,6 @@ config X86_MCE_AMD Additional support for AMD specific MCE features such as the DRAM Error Threshold. -config X86_MCE_THRESHOLD - depends on X86_MCE_AMD || X86_MCE_INTEL - bool - default y - config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" depends on X86_32 && X86_MCE diff --git a/trunk/arch/x86/include/asm/apicdef.h b/trunk/arch/x86/include/asm/apicdef.h index bc9514fb3b13..63134e31e8b9 100644 --- a/trunk/arch/x86/include/asm/apicdef.h +++ b/trunk/arch/x86/include/asm/apicdef.h @@ -53,7 +53,6 @@ #define APIC_ESR_SENDILL 0x00020 #define APIC_ESR_RECVILL 0x00040 #define APIC_ESR_ILLREGA 0x00080 -#define APIC_LVTCMCI 0x2f0 #define APIC_ICR 0x300 #define APIC_DEST_SELF 0x40000 #define APIC_DEST_ALLINC 0x80000 diff --git a/trunk/arch/x86/include/asm/efi.h b/trunk/arch/x86/include/asm/efi.h index edc90f23e708..ca5ffb2856b6 100644 --- a/trunk/arch/x86/include/asm/efi.h +++ b/trunk/arch/x86/include/asm/efi.h @@ -37,6 +37,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #else /* !CONFIG_X86_32 */ +#define MAX_EFI_IO_PAGES 100 + extern u64 efi_call0(void *fp); extern u64 efi_call1(void *fp, u64 arg1); extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); diff --git a/trunk/arch/x86/include/asm/entry_arch.h b/trunk/arch/x86/include/asm/entry_arch.h index 854d538ae857..c2e6bedaf258 100644 --- a/trunk/arch/x86/include/asm/entry_arch.h +++ b/trunk/arch/x86/include/asm/entry_arch.h @@ -33,6 +33,8 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, smp_invalidate_interrupt) #endif +BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/trunk/arch/x86/include/asm/fixmap.h b/trunk/arch/x86/include/asm/fixmap.h index 63a79c77d220..dca8f03da5b2 100644 --- a/trunk/arch/x86/include/asm/fixmap.h +++ b/trunk/arch/x86/include/asm/fixmap.h @@ -24,6 +24,9 @@ #include #else #include +#ifdef CONFIG_EFI +#include +#endif #endif /* @@ -89,6 +92,13 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif +#ifdef CONFIG_X86_64 +#ifdef CONFIG_EFI + FIX_EFI_IO_MAP_LAST_PAGE, + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + + MAX_EFI_IO_PAGES - 1, +#endif +#endif #ifdef CONFIG_X86_VISWS_APIC FIX_CO_CPU, /* Cobalt timer */ FIX_CO_APIC, /* Cobalt APIC Redirection Table */ diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h index 176f058e7159..039db6aa8e02 100644 --- a/trunk/arch/x86/include/asm/hardirq.h +++ b/trunk/arch/x86/include/asm/hardirq.h @@ -12,6 +12,7 @@ typedef struct { unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq_spurious_count; #endif + unsigned int generic_irqs; /* arch dependent */ #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h index 370e1c83bb49..b762ea49bd70 100644 --- a/trunk/arch/x86/include/asm/hw_irq.h +++ b/trunk/arch/x86/include/asm/hw_irq.h @@ -27,6 +27,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); +extern void generic_interrupt(void); extern void error_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/trunk/arch/x86/include/asm/i387.h b/trunk/arch/x86/include/asm/i387.h index 71c9e5183982..48f0004db8c9 100644 --- a/trunk/arch/x86/include/asm/i387.h +++ b/trunk/arch/x86/include/asm/i387.h @@ -172,13 +172,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) #else /* CONFIG_X86_32 */ -#ifdef CONFIG_MATH_EMULATION -extern void finit_task(struct task_struct *tsk); -#else -static inline void finit_task(struct task_struct *tsk) -{ -} -#endif +extern void finit(void); static inline void tolerant_fwait(void) { diff --git a/trunk/arch/x86/include/asm/init.h b/trunk/arch/x86/include/asm/init.h deleted file mode 100644 index 36fb1a6a5109..000000000000 --- a/trunk/arch/x86/include/asm/init.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _ASM_X86_INIT_32_H -#define _ASM_X86_INIT_32_H - -#ifdef CONFIG_X86_32 -extern void __init early_ioremap_page_table_range_init(void); -#endif - -extern unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask); - - -extern unsigned long __initdata e820_table_start; -extern unsigned long __meminitdata e820_table_end; -extern unsigned long __meminitdata e820_table_top; - -#endif /* _ASM_X86_INIT_32_H */ diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h index 107eb2196691..f38481bcd455 100644 --- a/trunk/arch/x86/include/asm/irq.h +++ b/trunk/arch/x86/include/asm/irq.h @@ -36,6 +36,7 @@ static inline int irq_canonicalize(int irq) extern void fixup_irqs(void); #endif +extern void (*generic_interrupt_extension)(void); extern void init_IRQ(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h index 8a285f356f8a..3cbd79bbb47c 100644 --- a/trunk/arch/x86/include/asm/irq_vectors.h +++ b/trunk/arch/x86/include/asm/irq_vectors.h @@ -111,6 +111,11 @@ */ #define LOCAL_PERF_VECTOR 0xee +/* + * Generic system vector for platform specific use + */ +#define GENERIC_INTERRUPT_VECTOR 0xed + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h index 563933e06a35..32c6e17b960b 100644 --- a/trunk/arch/x86/include/asm/mce.h +++ b/trunk/arch/x86/include/asm/mce.h @@ -11,8 +11,6 @@ */ #define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ #define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ @@ -92,29 +90,14 @@ extern int mce_disabled; #include -void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, device_mce); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); -/* - * To support more than 128 would need to escape the predefined - * Linux defined extended banks first. - */ -#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) - #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); -void cmci_clear(void); -void cmci_reenable(void); -void cmci_rediscover(int dying); -void cmci_recheck(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } -static inline void cmci_clear(void) {} -static inline void cmci_reenable(void) {} -static inline void cmci_rediscover(int dying) {} -static inline void cmci_recheck(void) {} #endif #ifdef CONFIG_X86_MCE_AMD @@ -123,23 +106,11 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif -extern int mce_available(struct cpuinfo_x86 *c); - -void mce_log_therm_throt_event(__u64 status); +void mce_log_therm_throt_event(unsigned int cpu, __u64 status); extern atomic_t mce_entry; extern void do_machine_check(struct pt_regs *, long); - -typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); -DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); - -enum mcp_flags { - MCP_TIMESTAMP = (1 << 0), /* log time stamp */ - MCP_UC = (1 << 1), /* log uncorrected errors */ -}; -extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); - extern int mce_notify_user(void); #endif /* !CONFIG_X86_32 */ @@ -149,8 +120,8 @@ extern void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do { } while (0) #endif - -extern void (*mce_threshold_vector)(void); +extern void stop_mce(void); +extern void restart_mce(void); #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/trunk/arch/x86/include/asm/msr-index.h b/trunk/arch/x86/include/asm/msr-index.h index 2dbd2314139e..358acc59ae04 100644 --- a/trunk/arch/x86/include/asm/msr-index.h +++ b/trunk/arch/x86/include/asm/msr-index.h @@ -77,11 +77,6 @@ #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 -/* These are consecutive and not in the normal 4er MCE bank block */ -#define MSR_IA32_MC0_CTL2 0x00000280 -#define CMCI_EN (1ULL << 30) -#define CMCI_THRESHOLD_MASK 0xffffULL - #define MSR_P6_PERFCTR0 0x000000c1 #define MSR_P6_PERFCTR1 0x000000c2 #define MSR_P6_EVNTSEL0 0x00000186 diff --git a/trunk/arch/x86/include/asm/page_types.h b/trunk/arch/x86/include/asm/page_types.h index 826ad37006ab..2d625da6603c 100644 --- a/trunk/arch/x86/include/asm/page_types.h +++ b/trunk/arch/x86/include/asm/page_types.h @@ -40,8 +40,14 @@ #ifndef __ASSEMBLY__ +struct pgprot; + extern int page_is_ram(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + struct pgprot vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + struct pgprot vma_prot); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; diff --git a/trunk/arch/x86/include/asm/pat.h b/trunk/arch/x86/include/asm/pat.h index 2cd07b9422f4..b0e70056838e 100644 --- a/trunk/arch/x86/include/asm/pat.h +++ b/trunk/arch/x86/include/asm/pat.h @@ -2,7 +2,6 @@ #define _ASM_X86_PAT_H #include -#include #ifdef CONFIG_X86_PAT extern int pat_enabled; @@ -18,9 +17,5 @@ extern int free_memtype(u64 start, u64 end); extern int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flag); -extern void map_devmem(unsigned long pfn, unsigned long size, - struct pgprot vma_prot); -extern void unmap_devmem(unsigned long pfn, unsigned long size, - struct pgprot vma_prot); #endif /* _ASM_X86_PAT_H */ diff --git a/trunk/arch/x86/include/asm/pgtable_32_types.h b/trunk/arch/x86/include/asm/pgtable_32_types.h index 2733fad45f98..bd8df3b2fe04 100644 --- a/trunk/arch/x86/include/asm/pgtable_32_types.h +++ b/trunk/arch/x86/include/asm/pgtable_32_types.h @@ -25,11 +25,6 @@ * area for the same reason. ;) */ #define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLER__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - #define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 diff --git a/trunk/arch/x86/include/asm/pgtable_types.h b/trunk/arch/x86/include/asm/pgtable_types.h index b8238dc8786d..4d258ad76a0f 100644 --- a/trunk/arch/x86/include/asm/pgtable_types.h +++ b/trunk/arch/x86/include/asm/pgtable_types.h @@ -273,7 +273,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern int nx_enabled; -extern void set_nx(void); #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c index 4c80f1557433..6907b8e85d52 100644 --- a/trunk/arch/x86/kernel/alternative.c +++ b/trunk/arch/x86/kernel/alternative.c @@ -414,17 +414,9 @@ void __init alternative_instructions(void) that might execute the to be patched code. Other CPUs are not running. */ stop_nmi(); - - /* - * Don't stop machine check exceptions while patching. - * MCEs only happen when something got corrupted and in this - * case we must do something about the corruption. - * Ignoring it is worse than a unlikely patching race. - * Also machine checks tend to be broadcast and if one CPU - * goes into machine check the others follow quickly, so we don't - * expect a machine check to cause undue problems during to code - * patching. - */ +#ifdef CONFIG_X86_MCE + stop_mce(); +#endif apply_alternatives(__alt_instructions, __alt_instructions_end); @@ -464,6 +456,9 @@ void __init alternative_instructions(void) (unsigned long)__smp_locks_end); restart_nmi(); +#ifdef CONFIG_X86_MCE + restart_mce(); +#endif } /** diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c index 30909a258d0f..f9cecdfd05c5 100644 --- a/trunk/arch/x86/kernel/apic/apic.c +++ b/trunk/arch/x86/kernel/apic/apic.c @@ -46,7 +46,6 @@ #include #include #include -#include unsigned int num_processors; @@ -843,14 +842,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); } #endif -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 6) { - v = apic_read(APIC_LVTCMCI); - if (!(v & APIC_LVT_MASKED)) - apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); - } -#endif - /* * Clean APIC state for other OSs: */ @@ -1250,12 +1241,6 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_LVT1, value); preempt_enable(); - -#ifdef CONFIG_X86_MCE_INTEL - /* Recheck CMCI information after local APIC is up on CPU #0 */ - if (smp_processor_id() == 0) - cmci_recheck(); -#endif } void __cpuinit end_local_APIC_setup(void) diff --git a/trunk/arch/x86/kernel/cpu/mcheck/Makefile b/trunk/arch/x86/kernel/cpu/mcheck/Makefile index b2f89829bbe8..d7d2323bbb69 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/Makefile +++ b/trunk/arch/x86/kernel/cpu/mcheck/Makefile @@ -4,4 +4,3 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o -obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c index 3552119b091d..dfaebce3633e 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -60,6 +60,20 @@ void mcheck_init(struct cpuinfo_x86 *c) } } +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + static int __init mcheck_disable(char *str) { mce_disabled = 1; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c index bfbd5323a635..fe79985ce0f2 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -3,8 +3,6 @@ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. * Rest from unknown author(s). * 2004 Andi Kleen. Rewrote most of it. - * Copyright 2008 Intel Corporation - * Author: Andi Kleen */ #include @@ -26,9 +24,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -37,6 +32,7 @@ #include #define MISC_MCELOG_MINOR 227 +#define NR_SYSFS_BANKS 6 atomic_t mce_entry; @@ -51,7 +47,7 @@ static int mce_dont_init; */ static int tolerant = 1; static int banks; -static u64 *bank; +static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; @@ -62,19 +58,6 @@ static char *trigger_argv[2] = { trigger, NULL }; static DECLARE_WAIT_QUEUE_HEAD(mce_wait); -/* MCA banks polled by the period polling timer for corrected events */ -DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { - [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL -}; - -/* Do initial initialization of a struct mce */ -void mce_setup(struct mce *m) -{ - memset(m, 0, sizeof(struct mce)); - m->cpu = smp_processor_id(); - rdtscll(m->tsc); -} - /* * Lockless MCE logging infrastructure. * This avoids deadlocks on printk locks without having to break locks. Also @@ -136,11 +119,11 @@ static void print_mce(struct mce *m) print_symbol("{%s}", m->ip); printk("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + printk(KERN_EMERG "TSC %Lx ", m->tsc); if (m->addr) - printk("ADDR %llx ", m->addr); + printk("ADDR %Lx ", m->addr); if (m->misc) - printk("MISC %llx ", m->misc); + printk("MISC %Lx ", m->misc); printk("\n"); printk(KERN_EMERG "This is not a software problem!\n"); printk(KERN_EMERG "Run through mcelog --ascii to decode " @@ -166,10 +149,8 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) panic(msg); } -int mce_available(struct cpuinfo_x86 *c) +static int mce_available(struct cpuinfo_x86 *c) { - if (mce_dont_init) - return 0; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -191,77 +172,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) } /* - * Poll for corrected events or events that happened before reset. - * Those are just logged through /dev/mcelog. - * - * This is executed in standard interrupt context. - */ -void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) -{ - struct mce m; - int i; - - mce_setup(&m); - - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); - for (i = 0; i < banks; i++) { - if (!bank[i] || !test_bit(i, *b)) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - m.tsc = 0; - - barrier(); - rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); - if (!(m.status & MCI_STATUS_VAL)) - continue; - - /* - * Uncorrected events are handled by the exception handler - * when it is enabled. But when the exception is disabled log - * everything. - * - * TBD do the same check for MCI_STATUS_EN here? - */ - if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) - continue; - - if (m.status & MCI_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); - if (m.status & MCI_STATUS_ADDRV) - rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); - - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - /* - * Don't get the IP here because it's unlikely to - * have anything to do with the actual error location. - */ - - mce_log(&m); - add_taint(TAINT_MACHINE_CHECK); - - /* - * Clear state for this bank. - */ - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } - - /* - * Don't clear MCG_STATUS here because it's only defined for - * exceptions. - */ -} - -/* - * The actual machine check handler. This only handles real - * exceptions when something got corrupted coming in through int 18. - * - * This is executed in NMI context not subject to normal locking rules. This - * implies that most kernel services cannot be safely used. Don't even - * think about putting a printk in there! + * The actual machine check handler */ void do_machine_check(struct pt_regs * regs, long error_code) { @@ -279,18 +190,17 @@ void do_machine_check(struct pt_regs * regs, long error_code) * error. */ int kill_it = 0; - DECLARE_BITMAP(toclear, MAX_NR_BANKS); atomic_inc(&mce_entry); - if (notify_die(DIE_NMI, "machine check", regs, error_code, + if ((regs + && notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL) == NOTIFY_STOP) - goto out2; - if (!banks) + || !banks) goto out2; - mce_setup(&m); - + memset(&m, 0, sizeof(struct mce)); + m.cpu = smp_processor_id(); rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); /* if the restart IP is not valid, we're done for */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) @@ -300,32 +210,18 @@ void do_machine_check(struct pt_regs * regs, long error_code) barrier(); for (i = 0; i < banks; i++) { - __clear_bit(i, toclear); - if (!bank[i]) + if (i < NR_SYSFS_BANKS && !bank[i]) continue; m.misc = 0; m.addr = 0; m.bank = i; + m.tsc = 0; rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); if ((m.status & MCI_STATUS_VAL) == 0) continue; - /* - * Non uncorrected errors are handled by machine_check_poll - * Leave them alone. - */ - if ((m.status & MCI_STATUS_UC) == 0) - continue; - - /* - * Set taint even when machine check was not enabled. - */ - add_taint(TAINT_MACHINE_CHECK); - - __set_bit(i, toclear); - if (m.status & MCI_STATUS_EN) { /* if PCC was set, there's no way out */ no_way_out |= !!(m.status & MCI_STATUS_PCC); @@ -339,12 +235,6 @@ void do_machine_check(struct pt_regs * regs, long error_code) no_way_out = 1; kill_it = 1; } - } else { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ - continue; } if (m.status & MCI_STATUS_MISCV) @@ -353,7 +243,10 @@ void do_machine_check(struct pt_regs * regs, long error_code) rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); mce_get_rip(&m, regs); - mce_log(&m); + if (error_code >= 0) + rdtscll(m.tsc); + if (error_code != -2) + mce_log(&m); /* Did this bank cause the exception? */ /* Assume that the bank with uncorrectable errors did it, @@ -362,8 +255,14 @@ void do_machine_check(struct pt_regs * regs, long error_code) panicm = m; panicm_found = 1; } + + add_taint(TAINT_MACHINE_CHECK); } + /* Never do anything final in the polling timer */ + if (!regs) + goto out; + /* If we didn't find an uncorrectable error, pick the last one (shouldn't happen, just being safe). */ if (!panicm_found) @@ -410,11 +309,10 @@ void do_machine_check(struct pt_regs * regs, long error_code) /* notify userspace ASAP */ set_thread_flag(TIF_MCE_NOTIFY); + out: /* the last thing we do is clear state */ - for (i = 0; i < banks; i++) { - if (test_bit(i, toclear)) - wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); wrmsrl(MSR_IA32_MCG_STATUS, 0); out2: atomic_dec(&mce_entry); @@ -434,13 +332,15 @@ void do_machine_check(struct pt_regs * regs, long error_code) * and historically has been the register value of the * MSR_IA32_THERMAL_STATUS (Intel) msr. */ -void mce_log_therm_throt_event(__u64 status) +void mce_log_therm_throt_event(unsigned int cpu, __u64 status) { struct mce m; - mce_setup(&m); + memset(&m, 0, sizeof(m)); + m.cpu = cpu; m.bank = MCE_THERMAL_BANK; m.status = status; + rdtscll(m.tsc); mce_log(&m); } #endif /* CONFIG_X86_MCE_INTEL */ @@ -453,18 +353,18 @@ void mce_log_therm_throt_event(__u64 status) static int check_interval = 5 * 60; /* 5 minutes */ static int next_interval; /* in jiffies */ -static void mcheck_timer(unsigned long); -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static void mcheck_timer(struct work_struct *work); +static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); -static void mcheck_timer(unsigned long data) +static void mcheck_check_cpu(void *info) { - struct timer_list *t = &per_cpu(mce_timer, data); - - WARN_ON(smp_processor_id() != data); - if (mce_available(¤t_cpu_data)) - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + do_machine_check(NULL, 0); +} + +static void mcheck_timer(struct work_struct *work) +{ + on_each_cpu(mcheck_check_cpu, NULL, 1); /* * Alert userspace if needed. If we logged an MCE, reduce the @@ -477,41 +377,31 @@ static void mcheck_timer(unsigned long data) (int)round_jiffies_relative(check_interval*HZ)); } - t->expires = jiffies + next_interval; - add_timer(t); -} - -static void mce_do_trigger(struct work_struct *work) -{ - call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); + schedule_delayed_work(&mcheck_work, next_interval); } -static DECLARE_WORK(mce_trigger_work, mce_do_trigger); - /* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. + * This is only called from process context. This is where we do + * anything we need to alert userspace about new MCEs. This is called + * directly from the poller and also from entry.S and idle, thanks to + * TIF_MCE_NOTIFY. */ int mce_notify_user(void) { - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - clear_thread_flag(TIF_MCE_NOTIFY); if (test_and_clear_bit(0, ¬ify_user)) { - wake_up_interruptible(&mce_wait); + static unsigned long last_print; + unsigned long now = jiffies; - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (trigger[0] && !work_pending(&mce_trigger_work)) - schedule_work(&mce_trigger_work); + wake_up_interruptible(&mce_wait); + if (trigger[0]) + call_usermodehelper(trigger, trigger_argv, NULL, + UMH_NO_WAIT); - if (__ratelimit(&ratelimit)) + if (time_after_eq(now, last_print + (check_interval*HZ))) { + last_print = now; printk(KERN_INFO "Machine check events logged\n"); + } return 1; } @@ -535,78 +425,63 @@ static struct notifier_block mce_idle_notifier = { static __init int periodic_mcheck_init(void) { - idle_notifier_register(&mce_idle_notifier); - return 0; + next_interval = check_interval * HZ; + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); + idle_notifier_register(&mce_idle_notifier); + return 0; } __initcall(periodic_mcheck_init); + /* * Initialize Machine Checks for a CPU. */ -static int mce_cap_init(void) +static void mce_init(void *dummy) { u64 cap; - unsigned b; + int i; rdmsrl(MSR_IA32_MCG_CAP, cap); - b = cap & 0xff; - if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); - b = MAX_NR_BANKS; + banks = cap & 0xff; + if (banks > MCE_EXTENDED_BANK) { + banks = MCE_EXTENDED_BANK; + printk(KERN_INFO "MCE: warning: using only %d banks\n", + MCE_EXTENDED_BANK); } - - /* Don't support asymmetric configurations today */ - WARN_ON(banks != 0 && b != banks); - banks = b; - if (!bank) { - bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); - if (!bank) - return -ENOMEM; - memset(bank, 0xff, banks * sizeof(u64)); - } - /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) rip_msr = MSR_IA32_MCG_EIP; - return 0; -} - -static void mce_init(void *dummy) -{ - u64 cap; - int i; - mce_banks_t all_banks; - - /* - * Log the machine checks left over from the previous reset. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC, &all_banks); + /* Log the machine checks left over from the previous reset. + This also clears all registers */ + do_machine_check(NULL, mce_bootlog ? -1 : -2); set_in_cr4(X86_CR4_MCE); - rdmsrl(MSR_IA32_MCG_CAP, cap); if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + if (i < NR_SYSFS_BANKS) + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + else + wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } } /* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) + if(c->x86 == 15) /* disable GART TBL walk error reporting, which trips off incorrectly with the IOMMU & 3ware & Cerberus. */ - clear_bit(10, (unsigned long *)&bank[4]); + clear_bit(10, &bank[4]); if(c->x86 <= 17 && mce_bootlog < 0) /* Lots of broken BIOS around that don't clear them by default and leave crap in there. Don't log. */ @@ -629,38 +504,20 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) -{ - struct timer_list *t = &__get_cpu_var(mce_timer); - - /* data race harmless because everyone sets to the same value */ - if (!next_interval) - next_interval = check_interval * HZ; - if (!next_interval) - return; - setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies_relative(jiffies + next_interval); - add_timer(t); -} - /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off. */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { - if (!mce_available(c)) - return; + mce_cpu_quirks(c); - if (mce_cap_init() < 0) { - mce_dont_init = 1; + if (mce_dont_init || + !mce_available(c)) return; - } - mce_cpu_quirks(c); mce_init(NULL); mce_cpu_features(c); - mce_init_timer(); } /* @@ -716,7 +573,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, { unsigned long *cpu_tsc; static DEFINE_MUTEX(mce_read_mutex); - unsigned prev, next; + unsigned next; char __user *buf = ubuf; int i, err; @@ -735,32 +592,25 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, } err = 0; - prev = 0; - do { - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - - while (!mcelog.entry[i].finished) { - if (time_after_eq(jiffies, start + 2)) { - memset(mcelog.entry + i, 0, - sizeof(struct mce)); - goto timeout; - } - cpu_relax(); + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + + while (!mcelog.entry[i].finished) { + if (time_after_eq(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + goto timeout; } - smp_rmb(); - err |= copy_to_user(buf, mcelog.entry + i, - sizeof(struct mce)); - buf += sizeof(struct mce); -timeout: - ; + cpu_relax(); } + smp_rmb(); + err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); + buf += sizeof(struct mce); + timeout: + ; + } - memset(mcelog.entry + prev, 0, - (next - prev) * sizeof(struct mce)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); + memset(mcelog.entry, 0, next * sizeof(struct mce)); + mcelog.next = 0; synchronize_sched(); @@ -830,6 +680,20 @@ static struct miscdevice mce_log_device = { &mce_chrdev_ops, }; +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + /* * Old style boot options parsing. Only for compatibility. */ @@ -839,7 +703,8 @@ static int __init mcheck_disable(char *str) return 1; } -/* mce=off disables machine check. +/* mce=off disables machine check. Note you can re-enable it later + using sysfs. mce=TOLERANCELEVEL (number, see above) mce=bootlog Log MCEs from before booting. Disabled by default on AMD. mce=nobootlog Don't log MCEs from before booting. */ @@ -863,29 +728,6 @@ __setup("mce=", mcheck_enable); * Sysfs support */ -/* - * Disable machine checks on suspend and shutdown. We can't really handle - * them later. - */ -static int mce_disable(void) -{ - int i; - - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); - return 0; -} - -static int mce_suspend(struct sys_device *dev, pm_message_t state) -{ - return mce_disable(); -} - -static int mce_shutdown(struct sys_device *dev) -{ - return mce_disable(); -} - /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. Only one CPU is active at this time, the others get readded later using CPU hotplug. */ @@ -896,24 +738,20 @@ static int mce_resume(struct sys_device *dev) return 0; } -static void mce_cpu_restart(void *data) -{ - del_timer_sync(&__get_cpu_var(mce_timer)); - if (mce_available(¤t_cpu_data)) - mce_init(NULL); - mce_init_timer(); -} - /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { + if (next_interval) + cancel_delayed_work(&mcheck_work); + /* Timer race is harmless here */ + on_each_cpu(mce_init, NULL, 1); next_interval = check_interval * HZ; - on_each_cpu(mce_cpu_restart, NULL, 1); + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); } static struct sysdev_class mce_sysclass = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, .resume = mce_resume, .name = "machinecheck", }; @@ -940,26 +778,16 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); -static struct sysdev_attribute *bank_attrs; - -static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, - char *buf) -{ - u64 b = bank[attr - bank_attrs]; - return sprintf(buf, "%llx\n", b); -} - -static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, - const char *buf, size_t siz) -{ - char *end; - u64 new = simple_strtoull(buf, &end, 0); - if (end == buf) - return -EINVAL; - bank[attr - bank_attrs] = new; - mce_restart(); - return end-buf; -} +/* + * TBD should generate these dynamically based on number of available banks. + * Have only 6 contol banks in /sysfs until then. + */ +ACCESSOR(bank0ctl,bank[0],mce_restart()) +ACCESSOR(bank1ctl,bank[1],mce_restart()) +ACCESSOR(bank2ctl,bank[2],mce_restart()) +ACCESSOR(bank3ctl,bank[3],mce_restart()) +ACCESSOR(bank4ctl,bank[4],mce_restart()) +ACCESSOR(bank5ctl,bank[5],mce_restart()) static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) @@ -986,6 +814,8 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); ACCESSOR(check_interval,check_interval,mce_restart()) static struct sysdev_attribute *mce_attributes[] = { + &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, + &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, &attr_tolerant.attr, &attr_check_interval, &attr_trigger, NULL }; @@ -1015,22 +845,11 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (err) goto error; } - for (i = 0; i < banks; i++) { - err = sysdev_create_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - if (err) - goto error2; - } cpu_set(cpu, mce_device_initialized); return 0; -error2: - while (--i >= 0) { - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); - } error: - while (--i >= 0) { + while (i--) { sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); } @@ -1049,46 +868,15 @@ static __cpuinit void mce_remove_device(unsigned int cpu) for (i = 0; mce_attributes[i]; i++) sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); - for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(device_mce, cpu), - &bank_attrs[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); cpu_clear(cpu, mce_device_initialized); } -/* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) -{ - int i; - unsigned long action = *(unsigned long *)h; - - if (!mce_available(¤t_cpu_data)) - return; - if (!(action & CPU_TASKS_FROZEN)) - cmci_clear(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); -} - -static void mce_reenable_cpu(void *h) -{ - int i; - unsigned long action = *(unsigned long *)h; - - if (!mce_available(¤t_cpu_data)) - return; - if (!(action & CPU_TASKS_FROZEN)) - cmci_reenable(); - for (i = 0; i < banks; i++) - wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); -} - /* Get notified when a cpu comes on/off. Be hotplug friendly. */ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action) { case CPU_ONLINE: @@ -1103,21 +891,6 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, threshold_cpu_callback(action, cpu); mce_remove_device(cpu); break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies_relative(jiffies + next_interval); - add_timer_on(t, cpu); - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - break; - case CPU_POST_DEAD: - /* intentionally ignoring frozen here */ - cmci_rediscover(cpu); - break; } return NOTIFY_OK; } @@ -1126,34 +899,6 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { .notifier_call = mce_cpu_callback, }; -static __init int mce_init_banks(void) -{ - int i; - - bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, - GFP_KERNEL); - if (!bank_attrs) - return -ENOMEM; - - for (i = 0; i < banks; i++) { - struct sysdev_attribute *a = &bank_attrs[i]; - a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); - if (!a->attr.name) - goto nomem; - a->attr.mode = 0644; - a->show = show_bank; - a->store = set_bank; - } - return 0; - -nomem: - while (--i >= 0) - kfree(bank_attrs[i].attr.name); - kfree(bank_attrs); - bank_attrs = NULL; - return -ENOMEM; -} - static __init int mce_init_device(void) { int err; @@ -1161,11 +906,6 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - - err = mce_init_banks(); - if (err) - return err; - err = sysdev_class_register(&mce_sysclass); if (err) return err; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index c5a32f92d07e..9817506dd469 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -79,8 +79,6 @@ static unsigned char shared_bank[NR_BANKS] = { static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ -static void amd_threshold_interrupt(void); - /* * CPU Initialization */ @@ -176,8 +174,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) tr.reset = 0; tr.old_limit = 0; threshold_restart_bank(&tr); - - mce_threshold_vector = amd_threshold_interrupt; } } } @@ -191,13 +187,19 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) * the interrupt goes off when error_count reaches threshold_limit. * the handler will simply log mcelog w/ software defined bank number. */ -static void amd_threshold_interrupt(void) +asmlinkage void mce_threshold_interrupt(void) { unsigned int bank, block; struct mce m; u32 low = 0, high = 0, address = 0; - mce_setup(&m); + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + memset(&m, 0, sizeof(m)); + rdtscll(m.tsc); + m.cpu = smp_processor_id(); /* assume first bank caused it */ for (bank = 0; bank < NR_BANKS; ++bank) { @@ -231,8 +233,7 @@ static void amd_threshold_interrupt(void) /* Log the machine check that caused the threshold event. */ - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + do_machine_check(NULL, 0); if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); @@ -242,10 +243,13 @@ static void amd_threshold_interrupt(void) + bank * NR_BLOCKS + block; mce_log(&m); - return; + goto out; } } } +out: + inc_irq_stat(irq_threshold_count); + irq_exit(); } /* diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index aaa7d9730938..aa5e287c98e0 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -1,8 +1,6 @@ /* * Intel specific MCE features. * Copyright 2004 Zwane Mwaikambo - * Copyright (C) 2008, 2009 Intel Corporation - * Author: Andi Kleen */ #include @@ -15,7 +13,6 @@ #include #include #include -#include asmlinkage void smp_thermal_interrupt(void) { @@ -28,7 +25,7 @@ asmlinkage void smp_thermal_interrupt(void) rdmsrl(MSR_IA32_THERM_STATUS, msr_val); if (therm_throt_process(msr_val & 1)) - mce_log_therm_throt_event(msr_val); + mce_log_therm_throt_event(smp_processor_id(), msr_val); inc_irq_stat(irq_thermal_count); irq_exit(); @@ -88,209 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) return; } -/* - * Support for Intel Correct Machine Check Interrupts. This allows - * the CPU to raise an interrupt when a corrected machine check happened. - * Normally we pick those up using a regular polling timer. - * Also supports reliable discovery of shared banks. - */ - -static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); - -/* - * cmci_discover_lock protects against parallel discovery attempts - * which could race against each other. - */ -static DEFINE_SPINLOCK(cmci_discover_lock); - -#define CMCI_THRESHOLD 1 - -static int cmci_supported(int *banks) -{ - u64 cap; - - /* - * Vendor check is not strictly needed, but the initial - * initialization is vendor keyed and this - * makes sure none of the backdoors are entered otherwise. - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return 0; - if (!cpu_has_apic || lapic_get_maxlvt() < 6) - return 0; - rdmsrl(MSR_IA32_MCG_CAP, cap); - *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); - return !!(cap & MCG_CMCI_P); -} - -/* - * The interrupt handler. This is called on every event. - * Just call the poller directly to log any events. - * This could in theory increase the threshold under high load, - * but doesn't for now. - */ -static void intel_threshold_interrupt(void) -{ - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - mce_notify_user(); -} - -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - -/* - * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks - * on this CPU. Use the algorithm recommended in the SDM to discover shared - * banks. - */ -static void cmci_discover(int banks, int boot) -{ - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); - int hdr = 0; - int i; - - spin_lock(&cmci_discover_lock); - for (i = 0; i < banks; i++) { - u64 val; - - if (test_bit(i, owned)) - continue; - - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - - /* Already owned by someone else? */ - if (val & CMCI_EN) { - if (test_and_clear_bit(i, owned) || boot) - print_update("SHD", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - continue; - } - - val |= CMCI_EN | CMCI_THRESHOLD; - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - - /* Did the enable bit stick? -- the bank supports CMCI */ - if (val & CMCI_EN) { - if (!test_and_set_bit(i, owned) || boot) - print_update("CMCI", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); - } - } - spin_unlock(&cmci_discover_lock); - if (hdr) - printk(KERN_CONT "\n"); -} - -/* - * Just in case we missed an event during initialization check - * all the CMCI owned banks. - */ -void cmci_recheck(void) -{ - unsigned long flags; - int banks; - - if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) - return; - local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - local_irq_restore(flags); -} - -/* - * Disable CMCI on this CPU for all banks it owns when it goes down. - * This allows other CPUs to claim the banks on rediscovery. - */ -void cmci_clear(void) -{ - int i; - int banks; - u64 val; - - if (!cmci_supported(&banks)) - return; - spin_lock(&cmci_discover_lock); - for (i = 0; i < banks; i++) { - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) - continue; - /* Disable CMCI */ - rdmsrl(MSR_IA32_MC0_CTL2 + i, val); - val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); - wrmsrl(MSR_IA32_MC0_CTL2 + i, val); - __clear_bit(i, __get_cpu_var(mce_banks_owned)); - } - spin_unlock(&cmci_discover_lock); -} - -/* - * After a CPU went down cycle through all the others and rediscover - * Must run in process context. - */ -void cmci_rediscover(int dying) -{ - int banks; - int cpu; - cpumask_var_t old; - - if (!cmci_supported(&banks)) - return; - if (!alloc_cpumask_var(&old, GFP_KERNEL)) - return; - cpumask_copy(old, ¤t->cpus_allowed); - - for_each_online_cpu (cpu) { - if (cpu == dying) - continue; - if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu))) - continue; - /* Recheck banks in case CPUs don't all have the same */ - if (cmci_supported(&banks)) - cmci_discover(banks, 0); - } - - set_cpus_allowed_ptr(current, old); - free_cpumask_var(old); -} - -/* - * Reenable CMCI on this CPU in case a CPU down failed. - */ -void cmci_reenable(void) -{ - int banks; - if (cmci_supported(&banks)) - cmci_discover(banks, 0); -} - -static __cpuinit void intel_init_cmci(void) -{ - int banks; - - if (!cmci_supported(&banks)) - return; - - mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); - /* - * For CPU #0 this runs with still disabled APIC, but that's - * ok because only the vector is set up. We still do another - * check for the banks later for CPU #0 just to make sure - * to not miss any events. - */ - apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); - cmci_recheck(); -} - void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); - intel_init_cmci(); } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c b/trunk/arch/x86/kernel/cpu/mcheck/threshold.c deleted file mode 100644 index 23ee9e730f78..000000000000 --- a/trunk/arch/x86/kernel/cpu/mcheck/threshold.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Common corrected MCE threshold handler code: - */ -#include -#include - -#include -#include -#include -#include - -static void default_threshold_interrupt(void) -{ - printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", - THRESHOLD_APIC_VECTOR); -} - -void (*mce_threshold_vector)(void) = default_threshold_interrupt; - -asmlinkage void mce_threshold_interrupt(void) -{ - exit_idle(); - irq_enter(); - inc_irq_stat(irq_threshold_count); - mce_threshold_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); -} diff --git a/trunk/arch/x86/kernel/efi.c b/trunk/arch/x86/kernel/efi.c index 1736acc4d7aa..b205272ad394 100644 --- a/trunk/arch/x86/kernel/efi.c +++ b/trunk/arch/x86/kernel/efi.c @@ -469,7 +469,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages, end_pfn; + u64 end, systab, addr, npages; void *p, *va; efi.systab = NULL; @@ -481,10 +481,7 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - end_pfn = PFN_UP(end); - if (end_pfn <= max_low_pfn_mapped - || (end_pfn > (1UL << (32 - PAGE_SHIFT)) - && end_pfn <= max_pfn_mapped)) + if (PFN_UP(end) <= max_low_pfn_mapped) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); diff --git a/trunk/arch/x86/kernel/efi_64.c b/trunk/arch/x86/kernel/efi_64.c index 22c3b7828c50..a4ee29127fdf 100644 --- a/trunk/arch/x86/kernel/efi_64.c +++ b/trunk/arch/x86/kernel/efi_64.c @@ -100,11 +100,24 @@ void __init efi_call_phys_epilog(void) void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { - unsigned long last_map_pfn; + static unsigned pages_mapped __initdata; + unsigned i, pages; + unsigned long offset; - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) + pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + + if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; - return (void __iomem *)__va(phys_addr); + for (i = 0; i < pages; i++) { + __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, + phys_addr, PAGE_KERNEL); + phys_addr += PAGE_SIZE; + pages_mapped++; + } + + return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ + (pages_mapped - pages)) + offset; } diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index 83d1836b9467..7ba4621c0dfa 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -984,6 +984,8 @@ apicinterrupt UV_BAU_MESSAGE \ #endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt +apicinterrupt GENERIC_INTERRUPT_VECTOR \ + generic_interrupt smp_generic_interrupt #ifdef CONFIG_SMP apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ diff --git a/trunk/arch/x86/kernel/i387.c b/trunk/arch/x86/kernel/i387.c index f2f8540a7f3d..b0f61f0dcd0a 100644 --- a/trunk/arch/x86/kernel/i387.c +++ b/trunk/arch/x86/kernel/i387.c @@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk) #ifdef CONFIG_X86_32 if (!HAVE_HWFP) { memset(tsk->thread.xstate, 0, xstate_size); - finit_task(tsk); + finit(); set_stopped_child_used_math(tsk); return 0; } diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index f13ca1650aaf..b864341dcc45 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -15,6 +15,9 @@ atomic_t irq_err_count; +/* Function pointer for generic interrupt vector handling */ +void (*generic_interrupt_extension)(void) = NULL; + /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -56,6 +59,12 @@ static int show_other_interrupts(struct seq_file *p) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_printf(p, " Local timer interrupts\n"); #endif + if (generic_interrupt_extension) { + seq_printf(p, "PLT: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); + seq_printf(p, " Platform interrupts\n"); + } #ifdef CONFIG_SMP seq_printf(p, "RES: "); for_each_online_cpu(j) @@ -163,6 +172,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; #endif + if (generic_interrupt_extension) + sum += irq_stats(cpu)->generic_irqs; #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; @@ -226,4 +237,27 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) return 1; } +/* + * Handler for GENERIC_INTERRUPT_VECTOR. + */ +void smp_generic_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + exit_idle(); + + irq_enter(); + + inc_irq_stat(generic_irqs); + + if (generic_interrupt_extension) + generic_interrupt_extension(); + + irq_exit(); + + set_irq_regs(old_regs); +} + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c index 50b8c3a3006c..bc1326105448 100644 --- a/trunk/arch/x86/kernel/irqinit_32.c +++ b/trunk/arch/x86/kernel/irqinit_32.c @@ -175,6 +175,9 @@ void __init native_init_IRQ(void) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + /* generic IPI for platform specific use */ + alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); + /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); diff --git a/trunk/arch/x86/kernel/irqinit_64.c b/trunk/arch/x86/kernel/irqinit_64.c index da481a1e3f30..c7a49e0ffbfb 100644 --- a/trunk/arch/x86/kernel/irqinit_64.c +++ b/trunk/arch/x86/kernel/irqinit_64.c @@ -147,6 +147,9 @@ static void __init apic_intr_init(void) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + /* generic IPI for platform specific use */ + alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); + /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c index e8192401da47..37cb1bda1baf 100644 --- a/trunk/arch/x86/kernel/mpparse.c +++ b/trunk/arch/x86/kernel/mpparse.c @@ -558,19 +558,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) static struct mpf_intel *mpf_found; -static unsigned long __init get_mpc_size(unsigned long physptr) -{ - struct mpc_table *mpc; - unsigned long size; - - mpc = early_ioremap(physptr, PAGE_SIZE); - size = mpc->length; - early_iounmap(mpc, PAGE_SIZE); - apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); - - return size; -} - /* * Scan the memory blocks for an SMP configuration block. */ @@ -624,16 +611,12 @@ static void __init __get_smp_config(unsigned int early) construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { - struct mpc_table *mpc; - unsigned long size; - size = get_mpc_size(mpf->physptr); - mpc = early_ioremap(mpf->physptr, size); /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(mpc, early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -641,10 +624,8 @@ static void __init __get_smp_config(unsigned int early) "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. " "(tell your hw vendor)\n"); - early_iounmap(mpc, size); return; } - early_iounmap(mpc, size); if (early) return; @@ -716,10 +697,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, if (!reserve) return 1; - reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), + reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); if (mpf->physptr) { - unsigned long size = get_mpc_size(mpf->physptr); + unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* * We cannot access to MPC table to compute diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index 2aef36d8aca2..1cc18d439bbb 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -216,14 +216,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, - { /* Handle problems with rebooting on Dell XPS710 */ - .callback = set_bios_reboot, - .ident = "Dell XPS710", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), - }, - }, { } }; diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index f28c56e6bf94..4c54bc0d8ff3 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -202,9 +202,7 @@ struct ist_info ist_info; #endif #else -struct cpuinfo_x86 boot_cpu_data __read_mostly = { - .x86_phys_bits = MAX_PHYSMEM_BITS, -}; +struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); #endif @@ -772,9 +770,6 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); - if (efi_enabled) - efi_init(); - dmi_scan_machine(); dmi_check_system(bad_bios_dmi_table); @@ -794,6 +789,8 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); + if (efi_enabled) + efi_init(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { diff --git a/trunk/arch/x86/math-emu/fpu_aux.c b/trunk/arch/x86/math-emu/fpu_aux.c index aa0987088774..491e737ce547 100644 --- a/trunk/arch/x86/math-emu/fpu_aux.c +++ b/trunk/arch/x86/math-emu/fpu_aux.c @@ -30,29 +30,20 @@ static void fclex(void) } /* Needs to be externally visible */ -void finit_task(struct task_struct *tsk) +void finit(void) { - struct i387_soft_struct *soft = &tsk->thread.xstate->soft; - struct address *oaddr, *iaddr; - soft->cwd = 0x037f; - soft->swd = 0; - soft->ftop = 0; /* We don't keep top in the status word internally. */ - soft->twd = 0xffff; + control_word = 0x037f; + partial_status = 0; + top = 0; /* We don't keep top in the status word internally. */ + fpu_tag_word = 0xffff; /* The behaviour is different from that detailed in Section 15.1.6 of the Intel manual */ - oaddr = (struct address *)&soft->foo; - oaddr->offset = 0; - oaddr->selector = 0; - iaddr = (struct address *)&soft->fip; - iaddr->offset = 0; - iaddr->selector = 0; - iaddr->opcode = 0; - soft->no_update = 1; -} - -void finit(void) -{ - finit_task(current); + operand_address.offset = 0; + operand_address.selector = 0; + instruction_address.offset = 0; + instruction_address.selector = 0; + instruction_address.opcode = 0; + no_ip_update = 1; } /* diff --git a/trunk/arch/x86/mm/highmem_32.c b/trunk/arch/x86/mm/highmem_32.c index d11745334a67..00f127c80b0e 100644 --- a/trunk/arch/x86/mm/highmem_32.c +++ b/trunk/arch/x86/mm/highmem_32.c @@ -158,6 +158,7 @@ EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); +#ifdef CONFIG_NUMA void __init set_highmem_pages_init(void) { struct zone *zone; @@ -181,3 +182,11 @@ void __init set_highmem_pages_init(void) } totalram_pages += totalhigh_pages; } +#else +void __init set_highmem_pages_init(void) +{ + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); + + totalram_pages += totalhigh_pages; +} +#endif /* CONFIG_NUMA */ diff --git a/trunk/arch/x86/mm/init.c b/trunk/arch/x86/mm/init.c index 6d63e3d1253d..ce6a722587d8 100644 --- a/trunk/arch/x86/mm/init.c +++ b/trunk/arch/x86/mm/init.c @@ -1,345 +1,8 @@ -#include #include - #include -#include -#include #include -#include #include #include -#include - -unsigned long __initdata e820_table_start; -unsigned long __meminitdata e820_table_end; -unsigned long __meminitdata e820_table_top; - -int after_bootmem; - -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) -{ - unsigned long puds, pmds, ptes, tables, start; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - - /* - * RED-PEN putting page tables only on node 0 could - * cause a hotspot and fill up ZONE_DMA. The page tables - * need roughly 0.5KB per GB. - */ -#ifdef CONFIG_X86_32 - start = 0x7000; - e820_table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; - e820_table_end = e820_table_start; - e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); -} - -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -#ifdef CONFIG_X86_32 -#define NR_RANGE_MR 3 -#else /* CONFIG_X86_64 */ -#define NR_RANGE_MR 5 -#endif - -static int save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) -{ - if (start_pfn < end_pfn) { - if (nr_range >= NR_RANGE_MR) - panic("run out of range for init_memory_mapping\n"); - mr[nr_range].start = start_pfn<> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; -#ifdef CONFIG_X86_32 - /* - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. - */ - if (pos == 0) - end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); - else - end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#endif - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (2M) range */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#ifdef CONFIG_X86_32 - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); -#endif - - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & - ((1<>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - - /* try to merge same page size and continuous */ - for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { - unsigned long old_start; - if (mr[i].end != mr[i+1].start || - mr[i].page_size_mask != mr[i+1].page_size_mask) - continue; - /* move it */ - old_start = mr[i].start; - memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof(struct map_range)); - mr[i--].start = old_start; - nr_range--; - } - - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1< e820_table_start) - reserve_early(e820_table_start << PAGE_SHIFT, - e820_table_end << PAGE_SHIFT, "PGTABLE"); - - if (!after_bootmem) - early_memtest(start, end); - - return ret >> PAGE_SHIFT; -} - - -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; -} void free_init_pages(char *what, unsigned long begin, unsigned long end) { @@ -384,10 +47,3 @@ void free_initmem(void) (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c index 2966c6b8d304..47df0e1bbeb9 100644 --- a/trunk/arch/x86/mm/init_32.c +++ b/trunk/arch/x86/mm/init_32.c @@ -49,7 +49,6 @@ #include #include #include -#include unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; @@ -59,14 +58,19 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); -bool __read_mostly __vmalloc_start_set = false; + +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + +static int __initdata after_init_bootmem; static __init void *alloc_low_page(void) { - unsigned long pfn = e820_table_end++; + unsigned long pfn = table_end++; void *adr; - if (pfn >= e820_table_top) + if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); adr = __va(pfn * PAGE_SIZE); @@ -86,7 +90,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_bootmem) + if (after_init_bootmem) pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); else pmd_table = (pmd_t *)alloc_low_page(); @@ -113,7 +117,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; - if (after_bootmem) { + if (after_init_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif @@ -164,12 +168,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, if (pmd_idx_kmap_begin != pmd_idx_kmap_end && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end - && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start - || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { + && ((__pa(pte) >> PAGE_SHIFT) < table_start + || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { pte_t *newpte; int i; - BUG_ON(after_bootmem); + BUG_ON(after_init_bootmem); newpte = alloc_low_page(); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(newpte + i, pte[i]); @@ -238,14 +242,11 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) +static void __init kernel_physical_mapping_init(pgd_t *pgd_base, + unsigned long start_pfn, + unsigned long end_pfn, + int use_pse) { - int use_pse = page_size_mask == (1<> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; - /* * First iteration will setup identity mapping using large/small pages * based on use_pse, with other attributes same as set by @@ -371,6 +369,26 @@ kernel_physical_mapping_init(unsigned long start, mapping_iter = 2; goto repeat; } +} + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pagenr)) + return 1; return 0; } @@ -527,9 +545,8 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -void __init early_ioremap_page_table_range_init(void) +static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) { - pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; /* @@ -624,7 +641,7 @@ static int __init noexec_setup(char *str) } early_param("noexec", noexec_setup); -void __init set_nx(void) +static void __init set_nx(void) { unsigned int v[4], l, h; @@ -776,8 +793,6 @@ void __init initmem_init(unsigned long start_pfn, #ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif - __vmalloc_start_set = true; - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); @@ -799,61 +814,176 @@ static void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -static unsigned long __init setup_node_bootmem(int nodeid, - unsigned long start_pfn, - unsigned long end_pfn, - unsigned long bootmap) -{ - unsigned long bootmap_size; - - if (start_pfn > max_low_pfn) - return bootmap; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; - - /* don't touch min_low_pfn */ - bootmap_size = init_bootmem_node(NODE_DATA(nodeid), - bootmap >> PAGE_SHIFT, - start_pfn, end_pfn); - printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", - nodeid, start_pfn<> PAGE_SHIFT, + min_low_pfn, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped<> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + extra += PMD_SIZE; + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; -#ifdef CONFIG_NEED_MULTIPLE_NODES - for_each_online_node(nodeid) - bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid], - node_end_pfn[nodeid], bootmap); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables>>PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, + (table_start << PAGE_SHIFT) + tables); +} + +unsigned long __init_refok init_memory_mapping(unsigned long start, + unsigned long end) +{ + pgd_t *pgd_base = swapper_pg_dir; + unsigned long start_pfn, end_pfn; + unsigned long big_page_start; +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. + * This will simplify cpa(), which otherwise needs to support splitting + * large pages into small in interrupt context, etc. + */ + int use_pse = 0; #else - bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); + int use_pse = cpu_has_pse; +#endif + + /* + * Find space for the kernel direct mapping tables. + */ + if (!after_init_bootmem) + find_early_table_space(end, use_pse); + +#ifdef CONFIG_X86_PAE + set_nx(); + if (nx_enabled) + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); #endif - after_bootmem = 1; + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } + + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + big_page_start = PMD_SIZE; + + if (start < big_page_start) { + start_pfn = start >> PAGE_SHIFT; + end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); + } else { + /* head is not big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + } + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); + + /* big page range */ + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < (big_page_start >> PAGE_SHIFT)) + start_pfn = big_page_start >> PAGE_SHIFT; + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, + use_pse); + + /* tail is not big page alignment ? */ + start_pfn = end_pfn; + if (start_pfn > (big_page_start>>PAGE_SHIFT)) { + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, + end_pfn, 0); + } + + early_ioremap_page_table_range_init(pgd_base); + + load_cr3(swapper_pg_dir); + + __flush_tlb_all(); + + if (!after_init_bootmem) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + if (!after_init_bootmem) + early_memtest(start, end); + + return end >> PAGE_SHIFT; } + /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. @@ -1087,6 +1217,13 @@ void mark_rodata_ro(void) } #endif +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif + int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c index 8a853bc3b287..07f44d491df1 100644 --- a/trunk/arch/x86/mm/init_64.c +++ b/trunk/arch/x86/mm/init_64.c @@ -48,7 +48,6 @@ #include #include #include -#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -62,6 +61,12 @@ static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +int direct_gbpages +#ifdef CONFIG_DIRECT_GBPAGES + = 1 +#endif +; + static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -82,6 +87,8 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ +int after_bootmem; + pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); @@ -318,9 +325,13 @@ void __init cleanup_highmap(void) } } +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + static __ref void *alloc_low_page(unsigned long *phys) { - unsigned long pfn = e820_table_end++; + unsigned long pfn = table_end++; void *adr; if (after_bootmem) { @@ -330,7 +341,7 @@ static __ref void *alloc_low_page(unsigned long *phys) return adr; } - if (pfn >= e820_table_top) + if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); @@ -570,10 +581,58 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) +{ + unsigned long puds, pmds, ptes, tables, start; + + puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + if (use_gbpages) { + unsigned long extra; + extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); + pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + + if (use_pse) { + unsigned long extra; + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ + start = 0x8000; + table_start = find_e820_area(start, end, tables, PAGE_SIZE); + if (table_start == -1UL) + panic("Cannot find space for the kernel page tables"); + + table_start >>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT); +} + +static void __init init_gbpages(void) +{ + if (direct_gbpages && cpu_has_gbpages) + printk(KERN_INFO "Using GB pages for direct mapping\n"); + else + direct_gbpages = 0; +} + +static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask) { unsigned long next, last_map_addr = end; @@ -610,6 +669,176 @@ kernel_physical_mapping_init(unsigned long start, return last_map_addr; } +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +#define NR_RANGE_MR 5 + +static int save_mr(struct map_range *mr, int nr_range, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long page_size_mask) +{ + + if (start_pfn < end_pfn) { + if (nr_range >= NR_RANGE_MR) + panic("run out of range for init_memory_mapping\n"); + mr[nr_range].start = start_pfn<> PAGE_SHIFT; + pos = start_pfn << PAGE_SHIFT; + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } + + /* big page (2M) range*/ + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & + ((1<>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<>PAGE_SHIFT; + end_pfn = end>>PAGE_SHIFT; + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + + /* try to merge same page size and continuous */ + for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { + unsigned long old_start; + if (mr[i].end != mr[i+1].start || + mr[i].page_size_mask != mr[i+1].page_size_mask) + continue; + /* move it */ + old_start = mr[i].start; + memmove(&mr[i], &mr[i+1], + (nr_range - 1 - i) * sizeof (struct map_range)); + mr[i--].start = old_start; + nr_range--; + } + + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG " %010lx - %010lx page %s\n", + mr[i].start, mr[i].end, + (mr[i].page_size_mask & (1< table_start) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + printk(KERN_INFO "last_map_addr: %lx end: %lx\n", + last_map_addr, end); + + if (!after_bootmem) + early_memtest(start, end); + + return last_map_addr >> PAGE_SHIFT; +} + #ifndef CONFIG_NUMA void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) { @@ -681,6 +910,28 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; @@ -768,6 +1019,13 @@ void mark_rodata_ro(void) #endif +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif + int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { diff --git a/trunk/arch/x86/mm/ioremap.c b/trunk/arch/x86/mm/ioremap.c index 62773abdf088..433f7bd4648a 100644 --- a/trunk/arch/x86/mm/ioremap.c +++ b/trunk/arch/x86/mm/ioremap.c @@ -38,7 +38,8 @@ unsigned long __phys_addr(unsigned long x) } else { VIRTUAL_BUG_ON(x < PAGE_OFFSET); x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); + VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : + !phys_addr_valid(x)); } return x; } @@ -55,8 +56,10 @@ bool __virt_addr_valid(unsigned long x) if (x < PAGE_OFFSET) return false; x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) + if (system_state == SYSTEM_BOOTING ? + x > MAXMEM : !phys_addr_valid(x)) { return false; + } } return pfn_valid(x >> PAGE_SHIFT); @@ -73,9 +76,10 @@ static inline int phys_addr_valid(unsigned long addr) #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { - /* VMALLOC_* aren't constants */ + /* VMALLOC_* aren't constants; not available at the boot time */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); + VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && + is_vmalloc_addr((void *) x)); return x - PAGE_OFFSET; } EXPORT_SYMBOL(__phys_addr); @@ -85,7 +89,7 @@ bool __virt_addr_valid(unsigned long x) { if (x < PAGE_OFFSET) return false; - if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) + if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } diff --git a/trunk/arch/x86/mm/kmmio.c b/trunk/arch/x86/mm/kmmio.c index 9f205030d9aa..93d82038af4b 100644 --- a/trunk/arch/x86/mm/kmmio.c +++ b/trunk/arch/x86/mm/kmmio.c @@ -32,14 +32,11 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ - bool old_presence; /* page presence prior to arming */ - bool armed; /* * Number of times this page has been registered as a part * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU) and post_kmmio_handler(). - * Protected by kmmio_lock, when linked into kmmio_page_table. + * Used only by writers (RCU). */ int count; }; @@ -108,85 +105,57 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) -{ - pmdval_t v = pmd_val(*pmd); - *old = !!(v & _PAGE_PRESENT); - v &= ~_PAGE_PRESENT; - if (present) - v |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(v)); -} - -static void set_pte_presence(pte_t *pte, bool present, bool *old) -{ - pteval_t v = pte_val(*pte); - *old = !!(v & _PAGE_PRESENT); - v &= ~_PAGE_PRESENT; - if (present) - v |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(v)); -} - -static int set_page_presence(unsigned long addr, bool present, bool *old) +static void set_page_present(unsigned long addr, bool present, + unsigned int *pglevel) { + pteval_t pteval; + pmdval_t pmdval; unsigned int level; + pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); if (!pte) { pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return -1; + return; } + if (pglevel) + *pglevel = level; + switch (level) { case PG_LEVEL_2M: - set_pmd_presence((pmd_t *)pte, present, old); + pmd = (pmd_t *)pte; + pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + if (present) + pmdval |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(pmdval)); break; + case PG_LEVEL_4K: - set_pte_presence(pte, present, old); + pteval = pte_val(*pte) & ~_PAGE_PRESENT; + if (present) + pteval |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(pteval)); break; + default: pr_err("kmmio: unexpected page level 0x%x.\n", level); - return -1; + return; } __flush_tlb_one(addr); - return 0; } -/* - * Mark the given page as not present. Access to it will trigger a fault. - * - * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the - * protection is ignored here. RCU read lock is assumed held, so the struct - * will not disappear unexpectedly. Furthermore, the caller must guarantee, - * that double arming the same virtual address (page) cannot occur. - * - * Double disarming on the other hand is allowed, and may occur when a fault - * and mmiotrace shutdown happen simultaneously. - */ -static int arm_kmmio_fault_page(struct kmmio_fault_page *f) +/** Mark the given page as not present. Access to it will trigger a fault. */ +static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - int ret; - WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); - if (f->armed) { - pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, f->old_presence); - } - ret = set_page_presence(f->page, false, &f->old_presence); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); - f->armed = true; - return ret; + set_page_present(page & PAGE_MASK, false, pglevel); } -/** Restore the given page to saved presence state. */ -static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) +/** Mark the given page as present. */ +static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - bool tmp; - int ret = set_page_presence(f->page, f->old_presence, &tmp); - WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); - f->armed = false; + set_page_present(page & PAGE_MASK, true, pglevel); } /* @@ -233,32 +202,28 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { + disarm_kmmio_fault_page(faultpage->page, NULL); if (addr == ctx->addr) { /* - * A second fault on the same page means some other - * condition needs handling by do_page_fault(), the - * page really not being present is the most common. + * On SMP we sometimes get recursive probe hits on the + * same address. Context is already saved, fall out. */ - pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", - addr, smp_processor_id()); - - if (!faultpage->old_presence) - pr_info("kmmio: unexpected secondary hit for " - "address 0x%08lx on CPU %d.\n", addr, - smp_processor_id()); - } else { - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at - * least prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " + pr_debug("kmmio: duplicate probe hit on CPU %d, for " + "address 0x%08lx.\n", + smp_processor_id(), addr); + ret = 1; + goto no_kmmio_ctx; + } + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at least + * prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); - disarm_kmmio_fault_page(faultpage); - } + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); goto no_kmmio_ctx; } ctx->active++; @@ -279,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage); + disarm_kmmio_fault_page(ctx->fpage->page, NULL); /* * If another cpu accesses the same page while we are stepping, @@ -310,7 +275,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_warning("kmmio: spurious debug trap on CPU %d.\n", + pr_debug("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } @@ -318,11 +283,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - /* Prevent racing against release_kmmio_fault_page(). */ - spin_lock(&kmmio_lock); - if (ctx->fpage->count) - arm_kmmio_fault_page(ctx->fpage); - spin_unlock(&kmmio_lock); + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; @@ -354,25 +315,21 @@ static int add_kmmio_fault_page(unsigned long page) f = get_kmmio_fault_page(page); if (f) { if (!f->count) - arm_kmmio_fault_page(f); + arm_kmmio_fault_page(f->page, NULL); f->count++; return 0; } - f = kzalloc(sizeof(*f), GFP_ATOMIC); + f = kmalloc(sizeof(*f), GFP_ATOMIC); if (!f) return -1; f->count = 1; f->page = page; - - if (arm_kmmio_fault_page(f)) { - kfree(f); - return -1; - } - list_add_rcu(&f->list, kmmio_page_list(f->page)); + arm_kmmio_fault_page(f->page, NULL); + return 0; } @@ -390,7 +347,7 @@ static void release_kmmio_fault_page(unsigned long page, f->count--; BUG_ON(f->count < 0); if (!f->count) { - disarm_kmmio_fault_page(f); + disarm_kmmio_fault_page(f->page, NULL); f->release_next = *release_list; *release_list = f; } diff --git a/trunk/arch/x86/mm/numa_32.c b/trunk/arch/x86/mm/numa_32.c index 3daefa04ace5..451fe95a0352 100644 --- a/trunk/arch/x86/mm/numa_32.c +++ b/trunk/arch/x86/mm/numa_32.c @@ -416,11 +416,10 @@ void __init initmem_init(unsigned long start_pfn, for_each_online_node(nid) propagate_e820_map_node(nid); - for_each_online_node(nid) { + for_each_online_node(nid) memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - } + NODE_DATA(0)->bdata = &bootmem_node_data[0]; setup_bootmem_allocator(); } diff --git a/trunk/arch/x86/mm/testmmiotrace.c b/trunk/arch/x86/mm/testmmiotrace.c index 427fd1b56df5..ab50a8d7402c 100644 --- a/trunk/arch/x86/mm/testmmiotrace.c +++ b/trunk/arch/x86/mm/testmmiotrace.c @@ -1,5 +1,5 @@ /* - * Written by Pekka Paalanen, 2008-2009 + * Written by Pekka Paalanen, 2008 */ #include #include @@ -9,74 +9,35 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " - "(or 8 MB if read_far is non-zero)."); - -static unsigned long read_far = 0x400100; -module_param(read_far, ulong, 0); -MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " - "(default: 0x400100)."); - -static unsigned v16(unsigned i) -{ - return i * 12 + 7; -} - -static unsigned v32(unsigned i) -{ - return i * 212371 + 13; -} +MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); static void do_write_test(void __iomem *p) { unsigned int i; - pr_info(MODULE_NAME ": write test.\n"); mmiotrace_printk("Write test.\n"); - for (i = 0; i < 256; i++) iowrite8(i, p + i); - for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(v16(i), p + i); - + iowrite16(i * 12 + 7, p + i); for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(v32(i), p + i); + iowrite32(i * 212371 + 13, p + i); } static void do_read_test(void __iomem *p) { unsigned int i; - unsigned errs[3] = { 0 }; - pr_info(MODULE_NAME ": read test.\n"); mmiotrace_printk("Read test.\n"); - for (i = 0; i < 256; i++) - if (ioread8(p + i) != i) - ++errs[0]; - + ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) - if (ioread16(p + i) != v16(i)) - ++errs[1]; - + ioread16(p + i); for (i = (5 * 1024); i < (16 * 1024); i += 4) - if (ioread32(p + i) != v32(i)) - ++errs[2]; - - mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", - errs[0], errs[1], errs[2]); + ioread32(p + i); } -static void do_read_far_test(void __iomem *p) +static void do_test(void) { - pr_info(MODULE_NAME ": read far test.\n"); - mmiotrace_printk("Read far test.\n"); - - ioread32(p + read_far); -} - -static void do_test(unsigned long size) -{ - void __iomem *p = ioremap_nocache(mmio_address, size); + void __iomem *p = ioremap_nocache(mmio_address, 0x4000); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; @@ -84,15 +45,11 @@ static void do_test(unsigned long size) mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); - if (read_far && read_far < size - 4) - do_read_far_test(p); iounmap(p); } static int __init init(void) { - unsigned long size = (read_far) ? (8 << 20) : (16 << 10); - if (mmio_address == 0) { pr_err(MODULE_NAME ": you have to use the module argument " "mmio_address.\n"); @@ -101,11 +58,10 @@ static int __init init(void) return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " - "address space, and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); - do_test(size); - pr_info(MODULE_NAME ": All done.\n"); + pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + "in PCI address space, and writing " + "rubbish in there.\n", mmio_address); + do_test(); return 0; } diff --git a/trunk/crypto/api.c b/trunk/crypto/api.c index 38a2bc02a98c..efe77df6863f 100644 --- a/trunk/crypto/api.c +++ b/trunk/crypto/api.c @@ -215,19 +215,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); type &= mask; - alg = crypto_alg_lookup(name, type, mask); - if (!alg) { - char tmp[CRYPTO_MAX_ALG_NAME]; - - request_module(name); - - if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask) && - snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp)) - request_module(tmp); - - alg = crypto_alg_lookup(name, type, mask); - } - + alg = try_then_request_module(crypto_alg_lookup(name, type, mask), + name); if (alg) return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; diff --git a/trunk/drivers/crypto/ixp4xx_crypto.c b/trunk/drivers/crypto/ixp4xx_crypto.c index d9e751be8c5f..2d637e0fbc03 100644 --- a/trunk/drivers/crypto/ixp4xx_crypto.c +++ b/trunk/drivers/crypto/ixp4xx_crypto.c @@ -457,12 +457,10 @@ static int init_ixp_crypto(void) if (!ctx_pool) { goto err; } - ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0, - "ixp_crypto:out", NULL); + ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0); if (ret) goto err; - ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0, - "ixp_crypto:in", NULL); + ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0); if (ret) { qmgr_release_queue(SEND_QID); goto err; diff --git a/trunk/drivers/crypto/padlock-aes.c b/trunk/drivers/crypto/padlock-aes.c index 3f0fdd18255d..856b3cc25583 100644 --- a/trunk/drivers/crypto/padlock-aes.c +++ b/trunk/drivers/crypto/padlock-aes.c @@ -489,4 +489,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS("aes"); diff --git a/trunk/drivers/crypto/padlock-sha.c b/trunk/drivers/crypto/padlock-sha.c index a2c8e8514b63..a7fbadebf623 100644 --- a/trunk/drivers/crypto/padlock-sha.c +++ b/trunk/drivers/crypto/padlock-sha.c @@ -304,7 +304,7 @@ MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("sha1-all"); -MODULE_ALIAS("sha256-all"); +MODULE_ALIAS("sha1"); +MODULE_ALIAS("sha256"); MODULE_ALIAS("sha1-padlock"); MODULE_ALIAS("sha256-padlock"); diff --git a/trunk/drivers/dma/iop-adma.c b/trunk/drivers/dma/iop-adma.c index 647374acba94..ea5440dd10dc 100644 --- a/trunk/drivers/dma/iop-adma.c +++ b/trunk/drivers/dma/iop-adma.c @@ -1401,7 +1401,7 @@ MODULE_ALIAS("platform:iop-adma"); static struct platform_driver iop_adma_driver = { .probe = iop_adma_probe, - .remove = __devexit_p(iop_adma_remove), + .remove = iop_adma_remove, .driver = { .owner = THIS_MODULE, .name = "iop-adma", diff --git a/trunk/drivers/dma/mv_xor.c b/trunk/drivers/dma/mv_xor.c index 5d5d5b31867f..d35cbd1ff0b3 100644 --- a/trunk/drivers/dma/mv_xor.c +++ b/trunk/drivers/dma/mv_xor.c @@ -1287,7 +1287,7 @@ mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, - .remove = __devexit_p(mv_xor_remove), + .remove = mv_xor_remove, .driver = { .owner = THIS_MODULE, .name = MV_XOR_NAME, diff --git a/trunk/drivers/gpu/drm/drm_stub.c b/trunk/drivers/gpu/drm/drm_stub.c index 7c8b15b22bf2..096e2a37446d 100644 --- a/trunk/drivers/gpu/drm/drm_stub.c +++ b/trunk/drivers/gpu/drm/drm_stub.c @@ -168,7 +168,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, file_priv->minor->master != file_priv->master) { mutex_lock(&dev->struct_mutex); file_priv->minor->master = drm_master_get(file_priv->master); - mutex_unlock(&dev->struct_mutex); + mutex_lock(&dev->struct_mutex); } return 0; diff --git a/trunk/drivers/i2c/busses/i2c-mv64xxx.c b/trunk/drivers/i2c/busses/i2c-mv64xxx.c index 7f186bbcb99d..eeda276f8f16 100644 --- a/trunk/drivers/i2c/busses/i2c-mv64xxx.c +++ b/trunk/drivers/i2c/busses/i2c-mv64xxx.c @@ -482,7 +482,7 @@ mv64xxx_i2c_map_regs(struct platform_device *pd, return 0; } -static void +static void __devexit mv64xxx_i2c_unmap_regs(struct mv64xxx_i2c_data *drv_data) { if (drv_data->reg_base) { @@ -577,7 +577,7 @@ mv64xxx_i2c_remove(struct platform_device *dev) static struct platform_driver mv64xxx_i2c_driver = { .probe = mv64xxx_i2c_probe, - .remove = __devexit_p(mv64xxx_i2c_remove), + .remove = mv64xxx_i2c_remove, .driver = { .owner = THIS_MODULE, .name = MV64XXX_I2C_CTLR_NAME, diff --git a/trunk/drivers/mtd/nand/orion_nand.c b/trunk/drivers/mtd/nand/orion_nand.c index c2dfd3ea353d..917cf8d3ae95 100644 --- a/trunk/drivers/mtd/nand/orion_nand.c +++ b/trunk/drivers/mtd/nand/orion_nand.c @@ -149,7 +149,7 @@ static int __devexit orion_nand_remove(struct platform_device *pdev) static struct platform_driver orion_nand_driver = { .probe = orion_nand_probe, - .remove = __devexit_p(orion_nand_remove), + .remove = orion_nand_remove, .driver = { .name = "orion_nand", .owner = THIS_MODULE, diff --git a/trunk/drivers/net/arm/Makefile b/trunk/drivers/net/arm/Makefile index 811a3ccd14c1..c69c0cdba4a2 100644 --- a/trunk/drivers/net/arm/Makefile +++ b/trunk/drivers/net/arm/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_ARM_AM79C961A) += am79c961a.o -obj-$(CONFIG_ARM_ETHERH) += etherh.o +obj-$(CONFIG_ARM_ETHERH) += etherh.o ../8390.o obj-$(CONFIG_ARM_ETHER3) += ether3.o obj-$(CONFIG_ARM_ETHER1) += ether1.o obj-$(CONFIG_ARM_AT91_ETHER) += at91_ether.o diff --git a/trunk/drivers/net/arm/etherh.c b/trunk/drivers/net/arm/etherh.c index f52f668c49bf..54b52e5b1821 100644 --- a/trunk/drivers/net/arm/etherh.c +++ b/trunk/drivers/net/arm/etherh.c @@ -641,15 +641,15 @@ static const struct net_device_ops etherh_netdev_ops = { .ndo_open = etherh_open, .ndo_stop = etherh_close, .ndo_set_config = etherh_set_config, - .ndo_start_xmit = __ei_start_xmit, - .ndo_tx_timeout = __ei_tx_timeout, - .ndo_get_stats = __ei_get_stats, - .ndo_set_multicast_list = __ei_set_multicast_list, + .ndo_start_xmit = ei_start_xmit, + .ndo_tx_timeout = ei_tx_timeout, + .ndo_get_stats = ei_get_stats, + .ndo_set_multicast_list = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = __ei_poll, + .ndo_poll_controller = ei_poll, #endif }; diff --git a/trunk/drivers/video/pxafb.c b/trunk/drivers/video/pxafb.c index 2552b9f325ee..48ff701d3a72 100644 --- a/trunk/drivers/video/pxafb.c +++ b/trunk/drivers/video/pxafb.c @@ -2230,7 +2230,7 @@ static int __devexit pxafb_remove(struct platform_device *dev) static struct platform_driver pxafb_driver = { .probe = pxafb_probe, - .remove = __devexit_p(pxafb_remove), + .remove = pxafb_remove, .suspend = pxafb_suspend, .resume = pxafb_resume, .driver = { diff --git a/trunk/include/linux/rcuclassic.h b/trunk/include/linux/rcuclassic.h index 80044a4f3ab9..f3f697df1d71 100644 --- a/trunk/include/linux/rcuclassic.h +++ b/trunk/include/linux/rcuclassic.h @@ -181,10 +181,4 @@ extern long rcu_batches_completed_bh(void); #define rcu_enter_nohz() do { } while (0) #define rcu_exit_nohz() do { } while (0) -/* A context switch is a grace period for rcuclassic. */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1; -} - #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h index 528343e6da51..921340a7b71c 100644 --- a/trunk/include/linux/rcupdate.h +++ b/trunk/include/linux/rcupdate.h @@ -52,9 +52,6 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -/* Internal to kernel, but needed by rcupreempt.h. */ -extern int rcu_scheduler_active; - #if defined(CONFIG_CLASSIC_RCU) #include #elif defined(CONFIG_TREE_RCU) @@ -268,7 +265,6 @@ extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); -extern void rcu_scheduler_starting(void); extern int rcu_needs_cpu(int cpu); #endif /* __LINUX_RCUPDATE_H */ diff --git a/trunk/include/linux/rcupreempt.h b/trunk/include/linux/rcupreempt.h index 74304b4538d8..3e05c09b54a2 100644 --- a/trunk/include/linux/rcupreempt.h +++ b/trunk/include/linux/rcupreempt.h @@ -142,19 +142,4 @@ static inline void rcu_exit_nohz(void) #define rcu_exit_nohz() do { } while (0) #endif /* CONFIG_NO_HZ */ -/* - * A context switch is a grace period for rcupreempt synchronize_rcu() - * only during early boot, before the scheduler has been initialized. - * So, how the heck do we get a context switch? Well, if the caller - * invokes synchronize_rcu(), they are willing to accept a context - * switch, so we simply pretend that one happened. - * - * After boot, there might be a blocked or preempted task in an RCU - * read-side critical section, so we cannot then take the fastpath. - */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1 && !rcu_scheduler_active; -} - #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/trunk/include/linux/rcutree.h b/trunk/include/linux/rcutree.h index a722fb67bb2d..d4368b7975c3 100644 --- a/trunk/include/linux/rcutree.h +++ b/trunk/include/linux/rcutree.h @@ -326,10 +326,4 @@ static inline void rcu_exit_nohz(void) } #endif /* CONFIG_NO_HZ */ -/* A context switch is a grace period for rcutree. */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1; -} - #endif /* __LINUX_RCUTREE_H */ diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index a7c7698583bb..f0a50b20e8a0 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -2303,13 +2303,9 @@ extern long sched_group_rt_runtime(struct task_group *tg); extern int sched_group_set_rt_period(struct task_group *tg, long rt_period_us); extern long sched_group_rt_period(struct task_group *tg); -extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif #endif -extern int task_can_switch_user(struct user_struct *up, - struct task_struct *tsk); - #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { diff --git a/trunk/init/main.c b/trunk/init/main.c index 6bf83afd654d..6441083f8273 100644 --- a/trunk/init/main.c +++ b/trunk/init/main.c @@ -98,7 +98,7 @@ static inline void mark_rodata_ro(void) { } extern void tc_init(void); #endif -enum system_states system_state __read_mostly; +enum system_states system_state; EXPORT_SYMBOL(system_state); /* @@ -464,7 +464,6 @@ static noinline void __init_refok rest_init(void) * at least once to get things moving: */ init_idle_bootup_task(current); - rcu_scheduler_starting(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/trunk/kernel/rcuclassic.c b/trunk/kernel/rcuclassic.c index 654c640a6b9c..bd5a9003497c 100644 --- a/trunk/kernel/rcuclassic.c +++ b/trunk/kernel/rcuclassic.c @@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && rcu_scheduler_active && - !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/trunk/kernel/rcupdate.c b/trunk/kernel/rcupdate.c index cae8a059cf47..d92a76a881aa 100644 --- a/trunk/kernel/rcupdate.c +++ b/trunk/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include #include #include -#include enum rcu_barrier { RCU_BARRIER_STD, @@ -56,7 +55,6 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; -int rcu_scheduler_active __read_mostly; /* * Awaken the corresponding synchronize_rcu() instance now that a @@ -82,10 +80,6 @@ void wakeme_after_rcu(struct rcu_head *head) void synchronize_rcu(void) { struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); @@ -181,9 +175,3 @@ void __init rcu_init(void) __rcu_init(); } -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} diff --git a/trunk/kernel/rcupreempt.c b/trunk/kernel/rcupreempt.c index 5d59e850fb71..33cfc50781f9 100644 --- a/trunk/kernel/rcupreempt.c +++ b/trunk/kernel/rcupreempt.c @@ -1181,9 +1181,6 @@ void __synchronize_sched(void) { struct rcu_synchronize rcu; - if (num_online_cpus() == 1) - return; /* blocking is gp if only one CPU! */ - init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c index 97ce31579ec0..b2fd602a6f6f 100644 --- a/trunk/kernel/rcutree.c +++ b/trunk/kernel/rcutree.c @@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && rcu_scheduler_active && - !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 0a76d0b6f215..0e5c38e1c8b5 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) + if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) @@ -9219,16 +9219,6 @@ static int sched_rt_global_constraints(void) return ret; } - -int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) -{ - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) - return 0; - - return 1; -} - #else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { @@ -9322,7 +9312,8 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED - if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) return -EINVAL; #else /* We don't support RT-tasks being in separate groups */ diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c index 37f458e6882a..f145c415bc16 100644 --- a/trunk/kernel/sys.c +++ b/trunk/kernel/sys.c @@ -559,7 +559,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) abort_creds(new); return retval; } - + /* * change the user struct in a credentials set to match the new UID */ @@ -571,11 +571,6 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; - if (!task_can_switch_user(new_user, current)) { - free_uid(new_user); - return -EINVAL; - } - if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && new_user != INIT_USER) { @@ -636,11 +631,10 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) goto error; } - if (new->uid != old->uid) { - retval = set_user(new); - if (retval < 0) - goto error; - } + retval = -EAGAIN; + if (new->uid != old->uid && set_user(new) < 0) + goto error; + if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old->uid)) new->suid = new->euid; @@ -686,10 +680,9 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) retval = -EPERM; if (capable(CAP_SETUID)) { new->suid = new->uid = uid; - if (uid != old->uid) { - retval = set_user(new); - if (retval < 0) - goto error; + if (uid != old->uid && set_user(new) < 0) { + retval = -EAGAIN; + goto error; } } else if (uid != old->uid && uid != new->suid) { goto error; @@ -741,13 +734,11 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) goto error; } + retval = -EAGAIN; if (ruid != (uid_t) -1) { new->uid = ruid; - if (ruid != old->uid) { - retval = set_user(new); - if (retval < 0) - goto error; - } + if (ruid != old->uid && set_user(new) < 0) + goto error; } if (euid != (uid_t) -1) new->euid = euid; diff --git a/trunk/kernel/user.c b/trunk/kernel/user.c index 6a9b696128c8..3551ac742395 100644 --- a/trunk/kernel/user.c +++ b/trunk/kernel/user.c @@ -362,24 +362,6 @@ static void free_user(struct user_struct *up, unsigned long flags) #endif -#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) -/* - * We need to check if a setuid can take place. This function should be called - * before successfully completing the setuid. - */ -int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) -{ - - return sched_rt_can_attach(up->tg, tsk); - -} -#else -int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) -{ - return 1; -} -#endif - /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid().