From 8dfe25f235b0765878f96baed10ee8263eddb210 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:51 +0000 Subject: [PATCH] --- yaml --- r: 292512 b: refs/heads/master c: cb3bc9d0de1e247268622acd40e0d2f8120f299c h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/powerpc/include/asm/eeh.h | 26 +- .../arch/powerpc/include/asm/exception-64s.h | 113 ++--- trunk/arch/powerpc/include/asm/hw_irq.h | 63 +-- trunk/arch/powerpc/include/asm/irqflags.h | 37 +- trunk/arch/powerpc/include/asm/paca.h | 2 +- trunk/arch/powerpc/include/asm/ppc-pci.h | 71 +--- trunk/arch/powerpc/include/asm/ppc_asm.h | 2 - trunk/arch/powerpc/include/asm/reg.h | 22 +- trunk/arch/powerpc/include/asm/system.h | 38 -- trunk/arch/powerpc/include/asm/thread_info.h | 9 +- trunk/arch/powerpc/kernel/asm-offsets.c | 2 +- trunk/arch/powerpc/kernel/dbell.c | 2 - trunk/arch/powerpc/kernel/entry_64.S | 250 +++++------ trunk/arch/powerpc/kernel/exceptions-64e.S | 236 ++++------- trunk/arch/powerpc/kernel/exceptions-64s.S | 314 ++++++++++---- trunk/arch/powerpc/kernel/head_32.S | 4 +- trunk/arch/powerpc/kernel/head_40x.S | 4 +- trunk/arch/powerpc/kernel/head_64.S | 62 ++- trunk/arch/powerpc/kernel/head_8xx.S | 4 +- trunk/arch/powerpc/kernel/head_booke.h | 4 +- trunk/arch/powerpc/kernel/head_fsl_booke.S | 2 +- trunk/arch/powerpc/kernel/idle.c | 6 +- trunk/arch/powerpc/kernel/idle_book3e.S | 25 +- trunk/arch/powerpc/kernel/idle_power4.S | 24 +- trunk/arch/powerpc/kernel/idle_power7.S | 23 +- trunk/arch/powerpc/kernel/irq.c | 204 +++------ trunk/arch/powerpc/kernel/misc.S | 1 + trunk/arch/powerpc/kernel/process.c | 27 +- trunk/arch/powerpc/kernel/time.c | 8 +- trunk/arch/powerpc/kernel/traps.c | 3 - trunk/arch/powerpc/kernel/vmlinux.lds.S | 5 + trunk/arch/powerpc/mm/fault.c | 181 +++----- trunk/arch/powerpc/mm/slb_low.S | 16 + trunk/arch/powerpc/platforms/pseries/eeh.c | 400 ++++++++++++------ .../platforms/pseries/processor_idle.c | 18 +- trunk/arch/powerpc/xmon/xmon.c | 7 +- 37 files changed, 1023 insertions(+), 1194 deletions(-) diff --git a/[refs] b/[refs] index 704e6e16b392..2a0b4c5e39df 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 7230c5644188cd9e3fb380cc97dde00c464a3ba7 +refs/heads/master: cb3bc9d0de1e247268622acd40e0d2f8120f299c diff --git a/trunk/arch/powerpc/include/asm/eeh.h b/trunk/arch/powerpc/include/asm/eeh.h index 66ea9b8b95c5..232887721ff4 100644 --- a/trunk/arch/powerpc/include/asm/eeh.h +++ b/trunk/arch/powerpc/include/asm/eeh.h @@ -1,6 +1,6 @@ /* - * eeh.h * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. + * Copyright 2001-2012 IBM Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,8 +40,10 @@ extern int eeh_subsystem_enabled; #define EEH_MODE_RECOVERING (1<<3) #define EEH_MODE_IRQ_DISABLED (1<<4) -/* Max number of EEH freezes allowed before we consider the device - * to be permanently disabled. */ +/* + * Max number of EEH freezes allowed before we consider the device + * to be permanently disabled. + */ #define EEH_MAX_ALLOWED_FREEZES 5 void __init eeh_init(void); @@ -49,26 +51,8 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev); void __init pci_addr_cache_build(void); - -/** - * eeh_add_device_early - * eeh_add_device_late - * - * Perform eeh initialization for devices added after boot. - * Call eeh_add_device_early before doing any i/o to the - * device (including config space i/o). Call eeh_add_device_late - * to finish the eeh setup for this device. - */ void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); - -/** - * eeh_remove_device_recursive - undo EEH for device & children. - * @dev: pci device to be removed - * - * As above, this removes the device; it also removes child - * pci devices as well. - */ void eeh_remove_bus_device(struct pci_dev *); /** diff --git a/trunk/arch/powerpc/include/asm/exception-64s.h b/trunk/arch/powerpc/include/asm/exception-64s.h index 548da3aa0a30..8057f4f6980f 100644 --- a/trunk/arch/powerpc/include/asm/exception-64s.h +++ b/trunk/arch/powerpc/include/asm/exception-64s.h @@ -232,30 +232,23 @@ label##_hv: \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) -/* This associate vector numbers with bits in paca->irq_happened */ -#define SOFTEN_VALUE_0x500 PACA_IRQ_EE -#define SOFTEN_VALUE_0x502 PACA_IRQ_EE -#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC -#define SOFTEN_VALUE_0x982 PACA_IRQ_DEC - -#define __SOFTEN_TEST(h, vec) \ +#define __SOFTEN_TEST(h) \ lbz r10,PACASOFTIRQEN(r13); \ cmpwi r10,0; \ - li r10,SOFTEN_VALUE_##vec; \ beq masked_##h##interrupt -#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) +#define _SOFTEN_TEST(h) __SOFTEN_TEST(h) #define SOFTEN_TEST_PR(vec) \ KVMTEST_PR(vec); \ - _SOFTEN_TEST(EXC_STD, vec) + _SOFTEN_TEST(EXC_STD) #define SOFTEN_TEST_HV(vec) \ KVMTEST(vec); \ - _SOFTEN_TEST(EXC_HV, vec) + _SOFTEN_TEST(EXC_HV) #define SOFTEN_TEST_HV_201(vec) \ KVMTEST(vec); \ - _SOFTEN_TEST(EXC_STD, vec) + _SOFTEN_TEST(EXC_STD) #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM; \ @@ -279,55 +272,73 @@ label##_hv: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV) -/* - * Our exception common code can be passed various "additions" - * to specify the behaviour of interrupts, whether to kick the - * runlatch, etc... - */ - -/* Exception addition: Hard disable interrupts */ -#define DISABLE_INTS SOFT_DISABLE_INTS(r10,r11) +#ifdef CONFIG_PPC_ISERIES +#define DISABLE_INTS \ + li r11,0; \ + stb r11,PACASOFTIRQEN(r13); \ +BEGIN_FW_FTR_SECTION; \ + stb r11,PACAHARDIRQEN(r13); \ +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES); \ + TRACE_DISABLE_INTS; \ +BEGIN_FW_FTR_SECTION; \ + mfmsr r10; \ + ori r10,r10,MSR_EE; \ + mtmsrd r10,1; \ +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#else +#define DISABLE_INTS \ + li r11,0; \ + stb r11,PACASOFTIRQEN(r13); \ + stb r11,PACAHARDIRQEN(r13); \ + TRACE_DISABLE_INTS +#endif /* CONFIG_PPC_ISERIES */ -/* Exception addition: Keep interrupt state */ #define ENABLE_INTS \ - ld r11,PACAKMSR(r13); \ ld r12,_MSR(r1); \ + mfmsr r11; \ rlwimi r11,r12,0,MSR_EE; \ mtmsrd r11,1 -#define ADD_NVGPRS \ - bl .save_nvgprs - -#define RUNLATCH_ON \ -BEGIN_FTR_SECTION \ - clrrdi r3,r1,THREAD_SHIFT; \ - ld r4,TI_LOCAL_FLAGS(r3); \ - andi. r0,r4,_TLF_RUNLATCH; \ - beql ppc64_runlatch_on_trampoline; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - -#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - additions; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret - -#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ - EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \ - ADD_NVGPRS;DISABLE_INTS) +#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + bl .save_nvgprs; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur - * in the idle task and therefore need the special idle handling - * (finish nap and runlatch) + * in the idle task and therefore need the special idle handling. */ -#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ - EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ - FINISH_NAP;RUNLATCH_ON;DISABLE_INTS) +#define STD_EXCEPTION_COMMON_IDLE(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + FINISH_NAP; \ + DISABLE_INTS; \ + bl .save_nvgprs; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except + +#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + FINISH_NAP; \ + DISABLE_INTS; \ +BEGIN_FTR_SECTION \ + bl .ppc64_runlatch_on; \ +END_FTR_SECTION_IFSET(CPU_FTR_CTRL) \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/trunk/arch/powerpc/include/asm/hw_irq.h b/trunk/arch/powerpc/include/asm/hw_irq.h index 51010bfc792e..bb712c9488b3 100644 --- a/trunk/arch/powerpc/include/asm/hw_irq.h +++ b/trunk/arch/powerpc/include/asm/hw_irq.h @@ -11,27 +11,6 @@ #include #include -#ifdef CONFIG_PPC64 - -/* - * PACA flags in paca->irq_happened. - * - * This bits are set when interrupts occur while soft-disabled - * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS - * is set whenever we manually hard disable. - */ -#define PACA_IRQ_HARD_DIS 0x01 -#define PACA_IRQ_DBELL 0x02 -#define PACA_IRQ_EE 0x04 -#define PACA_IRQ_DEC 0x08 /* Or FIT */ -#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ - -#endif /* CONFIG_PPC64 */ - -#ifndef __ASSEMBLY__ - -extern void __replay_interrupt(unsigned int vector); - extern void timer_interrupt(struct pt_regs *); #ifdef CONFIG_PPC64 @@ -63,6 +42,7 @@ static inline unsigned long arch_local_irq_disable(void) } extern void arch_local_irq_restore(unsigned long); +extern void iseries_handle_interrupts(void); static inline void arch_local_irq_enable(void) { @@ -88,33 +68,16 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory"); #define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory"); #else -#define __hard_irq_enable() __mtmsrd(local_paca->kernel_msr | MSR_EE, 1) -#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) +#define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) +#define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) #endif -static inline void hard_irq_disable(void) -{ - __hard_irq_disable(); - get_paca()->soft_enabled = 0; - get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; -} - -/* - * This is called by asynchronous interrupts to conditionally - * re-enable hard interrupts when soft-disabled after having - * cleared the source of the interrupt - */ -static inline void may_hard_irq_enable(void) -{ - get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - if (!(get_paca()->irq_happened & PACA_IRQ_EE)) - __hard_irq_enable(); -} - -static inline bool arch_irq_disabled_regs(struct pt_regs *regs) -{ - return !regs->softe; -} +#define hard_irq_disable() \ + do { \ + __hard_irq_disable(); \ + get_paca()->soft_enabled = 0; \ + get_paca()->hard_enabled = 0; \ + } while(0) #else /* CONFIG_PPC64 */ @@ -176,13 +139,6 @@ static inline bool arch_irqs_disabled(void) #define hard_irq_disable() arch_local_irq_disable() -static inline bool arch_irq_disabled_regs(struct pt_regs *regs) -{ - return !(regs->msr & MSR_EE); -} - -static inline void may_hard_irq_enable(void) { } - #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST @@ -193,6 +149,5 @@ static inline void may_hard_irq_enable(void) { } */ struct irq_chip; -#endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/trunk/arch/powerpc/include/asm/irqflags.h b/trunk/arch/powerpc/include/asm/irqflags.h index 6f9b6e23dc5a..b0b06d85788d 100644 --- a/trunk/arch/powerpc/include/asm/irqflags.h +++ b/trunk/arch/powerpc/include/asm/irqflags.h @@ -39,31 +39,24 @@ #define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) #define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) -/* - * This is used by assembly code to soft-disable interrupts - */ -#define SOFT_DISABLE_INTS(__rA, __rB) \ - lbz __rA,PACASOFTIRQEN(r13); \ - lbz __rB,PACAIRQHAPPENED(r13); \ - cmpwi cr0,__rA,0; \ - li __rA,0; \ - ori __rB,__rB,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQHAPPENED(r13); \ - beq 44f; \ - stb __rA,PACASOFTIRQEN(r13); \ - TRACE_DISABLE_INTS; \ -44: - +#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip) \ + cmpdi en,0; \ + bne 95f; \ + stb en,PACASOFTIRQEN(r13); \ + TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) \ + b skip; \ +95: TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) \ + li en,1; +#define TRACE_AND_RESTORE_IRQ(en) \ + TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f); \ + stb en,PACASOFTIRQEN(r13); \ +96: #else #define TRACE_ENABLE_INTS #define TRACE_DISABLE_INTS - -#define SOFT_DISABLE_INTS(__rA, __rB) \ - lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,0; \ - ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACASOFTIRQEN(r13); \ - stb __rA,PACAIRQHAPPENED(r13) +#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip) +#define TRACE_AND_RESTORE_IRQ(en) \ + stb en,PACASOFTIRQEN(r13) #endif #endif diff --git a/trunk/arch/powerpc/include/asm/paca.h b/trunk/arch/powerpc/include/asm/paca.h index daf813fea91f..269c05a36d91 100644 --- a/trunk/arch/powerpc/include/asm/paca.h +++ b/trunk/arch/powerpc/include/asm/paca.h @@ -132,7 +132,7 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 soft_enabled; /* irq soft-enable flag */ - u8 irq_happened; /* irq happened while soft-disabled */ + u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ diff --git a/trunk/arch/powerpc/include/asm/ppc-pci.h b/trunk/arch/powerpc/include/asm/ppc-pci.h index 6d422979ebaf..221d82fd8231 100644 --- a/trunk/arch/powerpc/include/asm/ppc-pci.h +++ b/trunk/arch/powerpc/include/asm/ppc-pci.h @@ -47,92 +47,27 @@ extern int rtas_setup_phb(struct pci_controller *phb); extern unsigned long pci_probe_only; -/* ---- EEH internal-use-only related routines ---- */ #ifdef CONFIG_EEH void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); void pci_addr_cache_build(void); struct pci_dev *pci_get_device_by_addr(unsigned long addr); - -/** - * eeh_slot_error_detail -- record and EEH error condition to the log - * @pdn: pci device node - * @severity: EEH_LOG_TEMP_FAILURE or EEH_LOG_PERM_FAILURE - * - * Obtains the EEH error details from the RTAS subsystem, - * and then logs these details with the RTAS error log system. - */ #define EEH_LOG_TEMP_FAILURE 1 #define EEH_LOG_PERM_FAILURE 2 void eeh_slot_error_detail (struct pci_dn *pdn, int severity); - -/** - * rtas_pci_enable - enable IO transfers for this slot - * @pdn: pci device node - * @function: either EEH_THAW_MMIO or EEH_THAW_DMA - * - * Enable I/O transfers to this slot - */ #define EEH_THAW_MMIO 2 #define EEH_THAW_DMA 3 int rtas_pci_enable(struct pci_dn *pdn, int function); - -/** - * rtas_set_slot_reset -- unfreeze a frozen slot - * @pdn: pci device node - * - * Clear the EEH-frozen condition on a slot. This routine - * does this by asserting the PCI #RST line for 1/8th of - * a second; this routine will sleep while the adapter is - * being reset. - * - * Returns a non-zero value if the reset failed. - */ int rtas_set_slot_reset (struct pci_dn *); int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); - -/** - * eeh_restore_bars - Restore device configuration info. - * @pdn: pci device node - * - * A reset of a PCI device will clear out its config space. - * This routines will restore the config space for this - * device, and is children, to values previously obtained - * from the firmware. - */ void eeh_restore_bars(struct pci_dn *); - -/** - * rtas_configure_bridge -- firmware initialization of pci bridge - * @pdn: pci device node - * - * Ask the firmware to configure all PCI bridges devices - * located behind the indicated node. Required after a - * pci device reset. Does essentially the same hing as - * eeh_restore_bars, but for brdges, and lets firmware - * do the work. - */ void rtas_configure_bridge(struct pci_dn *); - int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); - -/** - * eeh_mark_slot -- set mode flags for pertition endpoint - * @pdn: pci device node - * - * mark and clear slots: find "partition endpoint" PE and set or - * clear the flags for each subnode of the PE. - */ -void eeh_mark_slot (struct device_node *dn, int mode_flag); -void eeh_clear_slot (struct device_node *dn, int mode_flag); - -/** - * find_device_pe -- Find the associated "Partiationable Endpoint" PE - * @pdn: pci device node - */ -struct device_node * find_device_pe(struct device_node *dn); +void eeh_mark_slot(struct device_node *dn, int mode_flag); +void eeh_clear_slot(struct device_node *dn, int mode_flag); +struct device_node *find_device_pe(struct device_node *dn); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/trunk/arch/powerpc/include/asm/ppc_asm.h b/trunk/arch/powerpc/include/asm/ppc_asm.h index 50f73aa2ba21..368f72f79808 100644 --- a/trunk/arch/powerpc/include/asm/ppc_asm.h +++ b/trunk/arch/powerpc/include/asm/ppc_asm.h @@ -60,8 +60,6 @@ BEGIN_FW_FTR_SECTION; \ cmpd cr1,r11,r10; \ beq+ cr1,33f; \ bl .accumulate_stolen_time; \ - ld r12,_MSR(r1); \ - andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ 33: \ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) diff --git a/trunk/arch/powerpc/include/asm/reg.h b/trunk/arch/powerpc/include/asm/reg.h index b1a215eabef6..7fdc2c0b7fa0 100644 --- a/trunk/arch/powerpc/include/asm/reg.h +++ b/trunk/arch/powerpc/include/asm/reg.h @@ -1079,12 +1079,30 @@ #define proc_trap() asm volatile("trap") -#define __get_SP() ({unsigned long sp; \ - asm volatile("mr %0,1": "=r" (sp)); sp;}) +#ifdef CONFIG_PPC64 + +extern void ppc64_runlatch_on(void); +extern void __ppc64_runlatch_off(void); + +#define ppc64_runlatch_off() \ + do { \ + if (cpu_has_feature(CPU_FTR_CTRL) && \ + test_thread_flag(TIF_RUNLATCH)) \ + __ppc64_runlatch_off(); \ + } while (0) extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); +#else +#define ppc64_runlatch_on() +#define ppc64_runlatch_off() + +#endif /* CONFIG_PPC64 */ + +#define __get_SP() ({unsigned long sp; \ + asm volatile("mr %0,1": "=r" (sp)); sp;}) + struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); diff --git a/trunk/arch/powerpc/include/asm/system.h b/trunk/arch/powerpc/include/asm/system.h index a02883d5af43..c377457d1b89 100644 --- a/trunk/arch/powerpc/include/asm/system.h +++ b/trunk/arch/powerpc/include/asm/system.h @@ -550,43 +550,5 @@ extern void reloc_got2(unsigned long); extern struct dentry *powerpc_debugfs_root; -#ifdef CONFIG_PPC64 - -extern void __ppc64_runlatch_on(void); -extern void __ppc64_runlatch_off(void); - -/* - * We manually hard enable-disable, this is called - * in the idle loop and we don't want to mess up - * with soft-disable/enable & interrupt replay. - */ -#define ppc64_runlatch_off() \ - do { \ - if (cpu_has_feature(CPU_FTR_CTRL) && \ - test_thread_local_flags(_TLF_RUNLATCH)) { \ - unsigned long msr = mfmsr(); \ - __hard_irq_disable(); \ - __ppc64_runlatch_off(); \ - if (msr & MSR_EE) \ - __hard_irq_enable(); \ - } \ - } while (0) - -#define ppc64_runlatch_on() \ - do { \ - if (cpu_has_feature(CPU_FTR_CTRL) && \ - !test_thread_local_flags(_TLF_RUNLATCH)) { \ - unsigned long msr = mfmsr(); \ - __hard_irq_disable(); \ - __ppc64_runlatch_on(); \ - if (msr & MSR_EE) \ - __hard_irq_enable(); \ - } \ - } while (0) -#else -#define ppc64_runlatch_on() -#define ppc64_runlatch_off() -#endif /* CONFIG_PPC64 */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ diff --git a/trunk/arch/powerpc/include/asm/thread_info.h b/trunk/arch/powerpc/include/asm/thread_info.h index 4a741c7efd02..964714940961 100644 --- a/trunk/arch/powerpc/include/asm/thread_info.h +++ b/trunk/arch/powerpc/include/asm/thread_info.h @@ -110,6 +110,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ +#define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<flags); } -static inline bool test_thread_local_flags(unsigned int flags) -{ - struct thread_info *ti = current_thread_info(); - return (ti->local_flags & flags) != 0; -} - #ifdef CONFIG_PPC64 #define is_32bit_task() (test_thread_flag(TIF_32BIT)) #else diff --git a/trunk/arch/powerpc/kernel/asm-offsets.c b/trunk/arch/powerpc/kernel/asm-offsets.c index cdd0d264415f..04caee7d9bc1 100644 --- a/trunk/arch/powerpc/kernel/asm-offsets.c +++ b/trunk/arch/powerpc/kernel/asm-offsets.c @@ -147,7 +147,7 @@ int main(void) DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); - DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); + DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/trunk/arch/powerpc/kernel/dbell.c b/trunk/arch/powerpc/kernel/dbell.c index 5b25c8060fd6..2cc451aaaca7 100644 --- a/trunk/arch/powerpc/kernel/dbell.c +++ b/trunk/arch/powerpc/kernel/dbell.c @@ -37,8 +37,6 @@ void doorbell_exception(struct pt_regs *regs) irq_enter(); - may_hard_irq_enable(); - smp_ipi_demux(); irq_exit(); diff --git a/trunk/arch/powerpc/kernel/entry_64.S b/trunk/arch/powerpc/kernel/entry_64.S index f8a7a1a1a9f4..866462cbe2d8 100644 --- a/trunk/arch/powerpc/kernel/entry_64.S +++ b/trunk/arch/powerpc/kernel/entry_64.S @@ -32,7 +32,6 @@ #include #include #include -#include /* * System calls. @@ -116,33 +115,39 @@ BEGIN_FW_FTR_SECTION END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ - /* - * A syscall should always be called with interrupts enabled - * so we just unconditionally hard-enable here. When some kind - * of irq tracing is used, we additionally check that condition - * is correct - */ -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) - lbz r10,PACASOFTIRQEN(r13) - xori r10,r10,1 -1: tdnei r10,0 - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif +#ifdef CONFIG_TRACE_IRQFLAGS + bl .trace_hardirqs_on + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD + ld r12,_MSR(r1) +#endif /* CONFIG_TRACE_IRQFLAGS */ + li r10,1 + stb r10,PACASOFTIRQEN(r13) + stb r10,PACAHARDIRQEN(r13) + std r10,SOFTE(r1) +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + /* Hack for handling interrupts when soft-enabling on iSeries */ + cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ + andi. r10,r12,MSR_PR /* from kernel */ + crand 4*cr0+eq,4*cr1+eq,4*cr0+eq + bne 2f + b hardware_interrupt_entry +2: +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif /* CONFIG_PPC_ISERIES */ + /* Hard enable interrupts */ #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - ld r11,PACAKMSR(r13) + mfmsr r11 ori r11,r11,MSR_EE mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ - /* We do need to set SOFTE in the stack frame or the return - * from interrupt will be painful - */ - li r10,1 - std r10,SOFTE(r1) - #ifdef SHOW_SYSCALLS bl .do_show_syscall REST_GPR(0,r1) @@ -193,14 +198,16 @@ syscall_exit: andi. r10,r8,MSR_RI beq- unrecov_restore #endif - /* - * Disable interrupts so current_thread_info()->flags can't change, + + /* Disable interrupts so current_thread_info()->flags can't change, * and so that we don't get interrupted after loading SRR0/1. */ #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) + mfmsr r10 + rldicl r10,r10,48,1 + rotldi r10,r10,16 mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -312,7 +319,7 @@ syscall_exit_work: #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - ld r10,PACAKMSR(r13) + mfmsr r10 ori r10,r10,MSR_EE mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -558,8 +565,10 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ - mtmsrd r10,1 /* Update machine state */ + mfmsr r10 /* Get current interrupt state */ + rldicl r9,r10,48,1 /* clear MSR_EE */ + rotldi r9,r9,16 + mtmsrd r9,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PREEMPT @@ -582,74 +591,25 @@ _GLOBAL(ret_from_except_lite) ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_USER_WORK_MASK bne do_work -#endif /* !CONFIG_PREEMPT */ +#endif - .globl fast_exc_return_irq -fast_exc_return_irq: restore: - /* - * This is the main kernel exit path, we first check if we - * have to change our interrupt state. - */ +BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) - lbz r6,PACASOFTIRQEN(r13) - cmpwi cr1,r5,0 - cmpw cr0,r5,r6 - beq cr0,4f - - /* We do, handle disable first, which is easy */ - bne cr1,3f; - li r0,0 - stb r0,PACASOFTIRQEN(r13); - TRACE_DISABLE_INTS - b 4f - -3: /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- restore_check_irq_replay +FW_FTR_SECTION_ELSE + b .Liseries_check_pending_irqs +ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) +2: + TRACE_AND_RESTORE_IRQ(r5); - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -restore_no_replay: - TRACE_ENABLE_INTS - li r0,1 - stb r0,PACASOFTIRQEN(r13); + /* extract EE bit and use it to restore paca->hard_enabled */ + ld r3,_MSR(r1) + rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ + stb r4,PACAHARDIRQEN(r13) - /* - * Final return path. BookE is handled in a different file - */ -4: #ifdef CONFIG_PPC_BOOK3E b .exception_return_book3e #else - /* - * Clear the reservation. If we know the CPU tracks the address of - * the reservation then we can potentially save some cycles and use - * a larx. On POWER6 and POWER7 this is significantly faster. - */ -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r4,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - /* - * Some code path such as load_up_fpu or altivec return directly - * here. They run entirely hard disabled and do not alter the - * interrupt state. They also don't use lwarx/stwcx. and thus - * are known not to leave dangling reservations. - */ - .globl fast_exception_return -fast_exception_return: - ld r3,_MSR(r1) ld r4,_CTR(r1) ld r0,_LINK(r1) mtctr r4 @@ -662,19 +622,29 @@ fast_exception_return: andi. r0,r3,MSR_RI beq- unrecov_restore + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + /* * Clear RI before restoring r13. If we are returning to * userspace and we take an exception after restoring r13, * we end up corrupting the userspace r13 value. */ - ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ - andc r4,r4,r0 /* r0 contains MSR_RI here */ + mfmsr r4 + andc r4,r4,r0 /* r0 contains MSR_RI here */ mtmsrd r4,1 /* * r13 is our per cpu area, only restore it if we are returning to - * userspace the value stored in the stack frame may belong to - * another CPU. + * userspace */ andi. r0,r3,MSR_PR beq 1f @@ -699,55 +669,30 @@ fast_exception_return: #endif /* CONFIG_PPC_BOOK3E */ - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -restore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl .__check_irq_replay - cmpwi cr0,r3,0 - beq restore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) +.Liseries_check_pending_irqs: +#ifdef CONFIG_PPC_ISERIES + ld r5,SOFTE(r1) + cmpdi 0,r5,0 + beq 2b + /* Check for pending interrupts (iSeries) */ + ld r3,PACALPPACAPTR(r13) + ld r3,LPPACAANYINT(r3) + cmpdi r3,0 + beq+ 2b /* skip do_IRQ if no interrupts */ + + li r3,0 + stb r3,PACASOFTIRQEN(r13) /* ensure we are soft-disabled */ +#ifdef CONFIG_TRACE_IRQFLAGS + bl .trace_hardirqs_off + mfmsr r10 +#endif + ori r10,r10,MSR_EE + mtmsrd r10 /* hard-enable again */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite /* loop back and handle more */ +#endif - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl .do_IRQ - b .ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl .timer_interrupt - b .ret_from_except -#ifdef CONFIG_PPC_BOOK3E -1: cmpwi cr0,r3,0x280 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl .doorbell_exception - b .ret_from_except -#endif /* CONFIG_PPC_BOOK3E */ -1: b .ret_from_except /* What else to do here ? */ - do_work: #ifdef CONFIG_PREEMPT andi. r0,r3,MSR_PR /* Returning to user mode? */ @@ -760,22 +705,31 @@ do_work: crandc eq,cr1*4+eq,eq bne restore - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first + /* Here we are preempting the current task. + * + * Ensure interrupts are soft-disabled. We also properly mark + * the PACA to reflect the fact that they are hard-disabled + * and trace the change */ - SOFT_DISABLE_INTS(r3,r4) + li r0,0 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + TRACE_DISABLE_INTS + + /* Call the scheduler with soft IRQs off */ 1: bl .preempt_schedule_irq /* Hard-disable interrupts again (and update PACA) */ #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + mfmsr r10 + rldicl r10,r10,48,1 + rotldi r10,r10,16 mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) + li r0,0 + stb r0,PACAHARDIRQEN(r13) /* Re-test flags and eventually loop */ clrrdi r9,r1,THREAD_SHIFT @@ -797,12 +751,14 @@ user_work: andi. r0,r4,_TIF_NEED_RESCHED beq 1f - bl .restore_interrupts + li r5,1 + TRACE_AND_RESTORE_IRQ(r5); bl .schedule b .ret_from_except_lite 1: bl .save_nvgprs - bl .restore_interrupts + li r5,1 + TRACE_AND_RESTORE_IRQ(r5); addi r3,r1,STACK_FRAME_OVERHEAD bl .do_notify_resume b .ret_from_except diff --git a/trunk/arch/powerpc/kernel/exceptions-64e.S b/trunk/arch/powerpc/kernel/exceptions-64e.S index 7215cc2495df..429983c06f91 100644 --- a/trunk/arch/powerpc/kernel/exceptions-64e.S +++ b/trunk/arch/powerpc/kernel/exceptions-64e.S @@ -24,7 +24,6 @@ #include #include #include -#include /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... @@ -78,55 +77,59 @@ #define SPRN_MC_SRR1 SPRN_MCSRR1 #define NORMAL_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, GEN, addition##_GEN(n)) + EXCEPTION_PROLOG(n, GEN, addition##_GEN) #define CRIT_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n)) + EXCEPTION_PROLOG(n, CRIT, addition##_CRIT) #define DBG_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, DBG, addition##_DBG(n)) + EXCEPTION_PROLOG(n, DBG, addition##_DBG) #define MC_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, MC, addition##_MC(n)) + EXCEPTION_PROLOG(n, MC, addition##_MC) /* Variants of the "addition" argument for the prolog */ -#define PROLOG_ADDITION_NONE_GEN(n) -#define PROLOG_ADDITION_NONE_CRIT(n) -#define PROLOG_ADDITION_NONE_DBG(n) -#define PROLOG_ADDITION_NONE_MC(n) +#define PROLOG_ADDITION_NONE_GEN +#define PROLOG_ADDITION_NONE_CRIT +#define PROLOG_ADDITION_NONE_DBG +#define PROLOG_ADDITION_NONE_MC -#define PROLOG_ADDITION_MASKABLE_GEN(n) \ +#define PROLOG_ADDITION_MASKABLE_GEN \ lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ cmpwi cr0,r11,0; /* yes -> go out of line */ \ - beq masked_interrupt_book3e_##n + beq masked_interrupt_book3e; -#define PROLOG_ADDITION_2REGS_GEN(n) \ +#define PROLOG_ADDITION_2REGS_GEN \ std r14,PACA_EXGEN+EX_R14(r13); \ std r15,PACA_EXGEN+EX_R15(r13) -#define PROLOG_ADDITION_1REG_GEN(n) \ +#define PROLOG_ADDITION_1REG_GEN \ std r14,PACA_EXGEN+EX_R14(r13); -#define PROLOG_ADDITION_2REGS_CRIT(n) \ +#define PROLOG_ADDITION_2REGS_CRIT \ std r14,PACA_EXCRIT+EX_R14(r13); \ std r15,PACA_EXCRIT+EX_R15(r13) -#define PROLOG_ADDITION_2REGS_DBG(n) \ +#define PROLOG_ADDITION_2REGS_DBG \ std r14,PACA_EXDBG+EX_R14(r13); \ std r15,PACA_EXDBG+EX_R15(r13) -#define PROLOG_ADDITION_2REGS_MC(n) \ +#define PROLOG_ADDITION_2REGS_MC \ std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) +#define PROLOG_ADDITION_DOORBELL_GEN \ + lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ + cmpwi cr0,r11,0; /* yes -> go out of line */ \ + beq masked_doorbell_book3e + /* Core exception code for all exceptions except TLB misses. * XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ #define EXCEPTION_COMMON(n, excf, ints) \ -exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ @@ -164,21 +167,20 @@ exc_##n##_common: \ std r0,RESULT(r1); /* clear regs->result */ \ ints; -/* Variants for the "ints" argument. This one does nothing when we want - * to keep interrupts in their original state - */ +/* Variants for the "ints" argument */ #define INTS_KEEP - -/* This second version is meant for exceptions that don't immediately - * hard-enable. We set a bit in paca->irq_happened to ensure that - * a subsequent call to arch_local_irq_restore() will properly - * hard-enable and avoid the fast-path - */ -#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4) - -/* This is called by exceptions that used INTS_KEEP (that did not touch - * irq indicators in the PACA). This will restore MSR:EE to it's previous - * value +#define INTS_DISABLE_SOFT \ + stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \ + TRACE_DISABLE_INTS; +#define INTS_DISABLE_HARD \ + stb r0,PACAHARDIRQEN(r13); /* and hard disabled */ +#define INTS_DISABLE_ALL \ + INTS_DISABLE_SOFT \ + INTS_DISABLE_HARD + +/* This is called by exceptions that used INTS_KEEP (that is did not clear + * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE + * to it's previous value * * XXX In the long run, we may want to open-code it in order to separate the * load from the wrtee, thus limiting the latency caused by the dependency @@ -236,7 +238,7 @@ exc_##n##_bad_stack: \ #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ - EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ + EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -287,7 +289,7 @@ interrupt_end_book3e: /* Critical Input Interrupt */ START_EXCEPTION(critical_input); CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) +// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -298,7 +300,7 @@ interrupt_end_book3e: /* Machine Check Interrupt */ START_EXCEPTION(machine_check); CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) +// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) // bl special_reg_save_mc // addi r3,r1,STACK_FRAME_OVERHEAD // CHECK_NAPPING(); @@ -311,7 +313,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP) b storage_fault_common /* Instruction Storage Interrupt */ @@ -319,7 +321,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 - EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP) b storage_fault_common /* External Input Interrupt */ @@ -337,11 +339,12 @@ interrupt_end_book3e: START_EXCEPTION(program); NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) + EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) bl .save_nvgprs + INTS_RESTORE_HARD bl .program_check_exception b .ret_from_except @@ -350,16 +353,15 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) - ld r12,_MSR(r1) - andi. r0,r12,MSR_PR; - beq- 1f - bl .load_up_fpu - b fast_exception_return -1: INTS_DISABLE + bne 1f /* if from user, just load it up */ bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD + INTS_RESTORE_HARD bl .kernel_fp_unavailable_exception - b .ret_from_except + BUG_OPCODE +1: ld r12,_MSR(r1) + bl .load_up_fpu + b fast_exception_return /* Decrementer Interrupt */ MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) @@ -370,7 +372,7 @@ interrupt_end_book3e: /* Watchdog Timer Interrupt */ START_EXCEPTION(watchdog); CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) +// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -389,9 +391,10 @@ interrupt_end_book3e: /* Auxiliary Processor Unavailable Interrupt */ START_EXCEPTION(ap_unavailable); NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) - bl .save_nvgprs + EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD bl .unknown_exception b .ret_from_except @@ -447,7 +450,7 @@ interrupt_end_book3e: mfspr r15,SPRN_SPRG_CRIT_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) + EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -462,7 +465,7 @@ kernel_dbg_exc: /* Debug exception as a debug interrupt*/ START_EXCEPTION(debug_debug); - DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS) + DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) /* * If there is a single step or branch-taken exception in an @@ -512,7 +515,7 @@ kernel_dbg_exc: mfspr r15,SPRN_SPRG_DBG_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) + EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -522,20 +525,21 @@ kernel_dbg_exc: bl .DebugException b .ret_from_except - START_EXCEPTION(perfmon); - NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .performance_monitor_exception - b .ret_from_except_lite + MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) /* Doorbell interrupt */ - MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE) + START_EXCEPTION(doorbell) + NORMAL_EXCEPTION_PROLOG(0x2070, PROLOG_ADDITION_DOORBELL) + EXCEPTION_COMMON(0x2070, PACA_EXGEN, INTS_DISABLE_ALL) + CHECK_NAPPING() + addi r3,r1,STACK_FRAME_OVERHEAD + bl .doorbell_exception + b .ret_from_except_lite /* Doorbell critical Interrupt */ START_EXCEPTION(doorbell_crit); - CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) + CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -543,114 +547,36 @@ kernel_dbg_exc: // b ret_from_crit_except b . -/* Guest Doorbell */ MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) -/* Guest Doorbell critical Interrupt */ - START_EXCEPTION(guest_doorbell_crit); - CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .guest_doorbell_critical_exception -// b ret_from_crit_except - b . - -/* Hypervisor call */ - START_EXCEPTION(hypercall); - NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs - INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except - -/* Embedded Hypervisor priviledged */ - START_EXCEPTION(ehpriv); - NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs - INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except /* - * An interrupt came in while soft-disabled; We mark paca->irq_happened - * accordingly and if the interrupt is level sensitive, we hard disable + * An interrupt came in while soft-disabled; clear EE in SRR1, + * clear paca->hard_enabled and return. */ - -masked_interrupt_book3e_0x500: - /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */ - li r11,PACA_IRQ_EE - b masked_interrupt_book3e_full_mask - -masked_interrupt_book3e_0x900: - ACK_DEC(r11); - li r11,PACA_IRQ_DEC - b masked_interrupt_book3e_no_mask -masked_interrupt_book3e_0x980: - ACK_FIT(r11); - li r11,PACA_IRQ_DEC - b masked_interrupt_book3e_no_mask -masked_interrupt_book3e_0x280: -masked_interrupt_book3e_0x2c0: - li r11,PACA_IRQ_DBELL - b masked_interrupt_book3e_no_mask - -masked_interrupt_book3e_no_mask: +masked_doorbell_book3e: mtcr r10 - lbz r10,PACAIRQHAPPENED(r13) - or r10,r10,r11 - stb r10,PACAIRQHAPPENED(r13) - b 1f -masked_interrupt_book3e_full_mask: + /* Resend the doorbell to fire again when ints enabled */ + mfspr r10,SPRN_PIR + PPC_MSGSND(r10) + b masked_interrupt_book3e_common + +masked_interrupt_book3e: mtcr r10 - lbz r10,PACAIRQHAPPENED(r13) - or r10,r10,r11 - stb r10,PACAIRQHAPPENED(r13) +masked_interrupt_book3e_common: + stb r11,PACAHARDIRQEN(r13) mfspr r10,SPRN_SRR1 rldicl r11,r10,48,1 /* clear MSR_EE */ rotldi r10,r11,16 mtspr SPRN_SRR1,r10 -1: ld r10,PACA_EXGEN+EX_R10(r13); + ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */ ld r11,PACA_EXGEN+EX_R11(r13); mfspr r13,SPRN_SPRG_GEN_SCRATCH; rfi b . -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 - * to indicate the kind of interrupt. MSR:EE is already off. - * We generate a stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about. - */ - mflr r10 - mfmsr r11 - mfcr r4 - mtspr SPRN_SPRG_GEN_SCRATCH,r13; - std r1,PACA_EXGEN+EX_R1(r13); - stw r4,PACA_EXGEN+EX_CR(r13); - ori r11,r11,MSR_EE - subi r1,r1,INT_FRAME_SIZE; - cmpwi cr0,r3,0x500 - beq exc_0x500_common - cmpwi cr0,r3,0x900 - beq exc_0x900_common - cmpwi cr0,r3,0x280 - beq exc_0x280_common - blr - /* * This is called from 0x300 and 0x400 handlers after the prologs with @@ -665,6 +591,7 @@ storage_fault_common: mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) + INTS_RESTORE_HARD bl .do_page_fault cmpdi r3,0 bne- 1f @@ -753,8 +680,6 @@ BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) BAD_STACK_TRAMPOLINE(0x260) -BAD_STACK_TRAMPOLINE(0x280) -BAD_STACK_TRAMPOLINE(0x2a0) BAD_STACK_TRAMPOLINE(0x2c0) BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) @@ -772,10 +697,11 @@ BAD_STACK_TRAMPOLINE(0xa00) BAD_STACK_TRAMPOLINE(0xb00) BAD_STACK_TRAMPOLINE(0xc00) BAD_STACK_TRAMPOLINE(0xd00) -BAD_STACK_TRAMPOLINE(0xd08) BAD_STACK_TRAMPOLINE(0xe00) BAD_STACK_TRAMPOLINE(0xf00) BAD_STACK_TRAMPOLINE(0xf20) +BAD_STACK_TRAMPOLINE(0x2070) +BAD_STACK_TRAMPOLINE(0x2080) .globl bad_stack_book3e bad_stack_book3e: diff --git a/trunk/arch/powerpc/kernel/exceptions-64s.S b/trunk/arch/powerpc/kernel/exceptions-64s.S index 2d0868a4e2f0..15c5a4f6de01 100644 --- a/trunk/arch/powerpc/kernel/exceptions-64s.S +++ b/trunk/arch/powerpc/kernel/exceptions-64s.S @@ -12,7 +12,6 @@ * */ -#include #include #include @@ -20,7 +19,7 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x5fff : interrupt support common interrupt prologs + * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs * 0x6000 - 0x6fff : Initial (CPU0) segment table * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - : Early init and support code @@ -357,60 +356,34 @@ do_stab_bolted_pSeries: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) /* - * An interrupt came in while soft-disabled. We set paca->irq_happened, - * then, if it was a decrementer interrupt, we bump the dec to max and - * and return, else we hard disable and return. This is called with - * r10 containing the value to OR to the paca field. + * An interrupt came in while soft-disabled; clear EE in SRR1, + * clear paca->hard_enabled and return. */ -#define MASKED_INTERRUPT(_H) \ -masked_##_H##interrupt: \ - std r11,PACA_EXGEN+EX_R11(r13); \ - lbz r11,PACAIRQHAPPENED(r13); \ - or r11,r11,r10; \ - stb r11,PACAIRQHAPPENED(r13); \ - andi. r10,r10,PACA_IRQ_DEC; \ - beq 1f; \ - lis r10,0x7fff; \ - ori r10,r10,0xffff; \ - mtspr SPRN_DEC,r10; \ - b 2f; \ -1: mfspr r10,SPRN_##_H##SRR1; \ - rldicl r10,r10,48,1; /* clear MSR_EE */ \ - rotldi r10,r10,16; \ - mtspr SPRN_##_H##SRR1,r10; \ -2: mtcrf 0x80,r9; \ - ld r9,PACA_EXGEN+EX_R9(r13); \ - ld r10,PACA_EXGEN+EX_R10(r13); \ - ld r11,PACA_EXGEN+EX_R11(r13); \ - GET_SCRATCH0(r13); \ - ##_H##rfid; \ +masked_interrupt: + stb r10,PACAHARDIRQEN(r13) + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + mfspr r10,SPRN_SRR1 + rldicl r10,r10,48,1 /* clear MSR_EE */ + rotldi r10,r10,16 + mtspr SPRN_SRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13) + GET_SCRATCH0(r13) + rfid b . - - MASKED_INTERRUPT() - MASKED_INTERRUPT(H) -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains 0x500 or 0x900 to indicate which - * kind of interrupt. MSR:EE is already off. We generate a - * stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about, so we don't bother storing them. - */ - mfmsr r12 - mflr r11 - mfcr r9 - ori r12,r12,MSR_EE - andi. r3,r3,0x0800 - bne decrementer_common - b hardware_interrupt_common +masked_Hinterrupt: + stb r10,PACAHARDIRQEN(r13) + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + mfspr r10,SPRN_HSRR1 + rldicl r10,r10,48,1 /* clear MSR_EE */ + rotldi r10,r10,16 + mtspr SPRN_HSRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13) + GET_SCRATCH0(r13) + hrfid + b . #ifdef CONFIG_PPC_PSERIES /* @@ -485,15 +458,14 @@ machine_check_common: bl .machine_check_exception b .ret_from_except - STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) - STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) - STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) @@ -510,9 +482,6 @@ machine_check_common: system_call_entry: b system_call_common -ppc64_runlatch_on_trampoline: - b .__ppc64_runlatch_on - /* * Here we have detected that the kernel stack pointer is bad. * R9 contains the saved CR, r13 points to the paca, @@ -586,8 +555,6 @@ data_access_common: mfspr r10,SPRN_DSISR stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) - DISABLE_INTS - ld r12,_MSR(r1) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) li r5,0x300 @@ -602,7 +569,6 @@ h_data_storage_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) bl .save_nvgprs - DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD bl .unknown_exception b .ret_from_except @@ -611,8 +577,6 @@ h_data_storage_common: .globl instruction_access_common instruction_access_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) - DISABLE_INTS - ld r12,_MSR(r1) ld r3,_NIP(r1) andis. r4,r12,0x5820 li r5,0x400 @@ -708,6 +672,12 @@ _GLOBAL(slb_miss_realmode) ld r10,PACA_EXSLB+EX_LR(r13) ld r3,PACA_EXSLB+EX_R3(r13) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + ld r11,PACALPPACAPTR(r13) + ld r11,LPPACASRR0(r11) /* get SRR0 value */ +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif /* CONFIG_PPC_ISERIES */ mtlr r10 @@ -720,6 +690,12 @@ _GLOBAL(slb_miss_realmode) mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ .machine pop +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif /* CONFIG_PPC_ISERIES */ ld r9,PACA_EXSLB+EX_R9(r13) ld r10,PACA_EXSLB+EX_R10(r13) ld r11,PACA_EXSLB+EX_R11(r13) @@ -728,7 +704,13 @@ _GLOBAL(slb_miss_realmode) rfid b . /* prevent speculative execution */ -2: mfspr r11,SPRN_SRR0 +2: +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + b unrecov_slb +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif /* CONFIG_PPC_ISERIES */ + mfspr r11,SPRN_SRR0 ld r10,PACAKBASE(r13) LOAD_HANDLER(r10,unrecov_slb) mtspr SPRN_SRR0,r10 @@ -745,6 +727,20 @@ unrecov_slb: bl .unrecoverable_exception b 1b + .align 7 + .globl hardware_interrupt_common + .globl hardware_interrupt_entry +hardware_interrupt_common: + EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) + FINISH_NAP +hardware_interrupt_entry: + DISABLE_INTS +BEGIN_FTR_SECTION + bl .ppc64_runlatch_on +END_FTR_SECTION_IFSET(CPU_FTR_CTRL) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite #ifdef CONFIG_PPC_970_NAP power4_fixup_nap: @@ -789,8 +785,8 @@ fp_unavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ bl .save_nvgprs - DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS bl .kernel_fp_unavailable_exception BUG_OPCODE 1: bl .load_up_fpu @@ -809,8 +805,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif bl .save_nvgprs - DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS bl .altivec_unavailable_exception b .ret_from_except @@ -820,14 +816,13 @@ vsx_unavailable_common: EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) #ifdef CONFIG_VSX BEGIN_FTR_SECTION - beq 1f - b .load_up_vsx + bne .load_up_vsx 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif bl .save_nvgprs - DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS bl .vsx_unavailable_exception b .ret_from_except @@ -835,6 +830,66 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) .globl __end_handlers __end_handlers: +/* + * Return from an exception with minimal checks. + * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. + * If interrupts have been enabled, or anything has been + * done that might have changed the scheduling status of + * any task or sent any task a signal, you should use + * ret_from_except or ret_from_except_lite instead of this. + */ +fast_exc_return_irq: /* restores irq state too */ + ld r3,SOFTE(r1) + TRACE_AND_RESTORE_IRQ(r3); + ld r12,_MSR(r1) + rldicl r4,r12,49,63 /* get MSR_EE to LSB */ + stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */ + b 1f + + .globl fast_exception_return +fast_exception_return: + ld r12,_MSR(r1) +1: ld r11,_NIP(r1) + andi. r3,r12,MSR_RI /* check if RI is set */ + beq- unrecov_fer + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + andi. r3,r12,MSR_PR + beq 2f + ACCOUNT_CPU_USER_EXIT(r3, r4) +2: +#endif + + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + mtcr r3 + mtlr r4 + mtctr r5 + mtxer r6 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + + mfmsr r10 + rldicl r10,r10,48,1 /* clear EE */ + rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */ + mtmsrd r10,1 + + mtspr SPRN_SRR1,r12 + mtspr SPRN_SRR0,r11 + REST_4GPRS(10, r1) + ld r1,GPR1(r1) + rfid + b . /* prevent speculative execution */ + +unrecov_fer: + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + + /* * Hash table stuff */ @@ -857,6 +912,28 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ + + /* + * On iSeries, we soft-disable interrupts here, then + * hard-enable interrupts so that the hash_page code can spin on + * the hash_table_lock without problems on a shared processor. + */ + DISABLE_INTS + + /* + * Currently, trace_hardirqs_off() will be called by DISABLE_INTS + * and will clobber volatile registers when irq tracing is enabled + * so we need to reload them. It may be possible to be smarter here + * and move the irq tracing elsewhere but let's keep it simple for + * now + */ +#ifdef CONFIG_TRACE_IRQFLAGS + ld r3,_DAR(r1) + ld r4,_DSISR(r1) + ld r5,_TRAP(r1) + ld r12,_MSR(r1) + clrrdi r5,r5,4 +#endif /* CONFIG_TRACE_IRQFLAGS */ /* * We need to set the _PAGE_USER bit if MSR_PR is set or if we are * accessing a userspace segment (even from the kernel). We assume @@ -874,25 +951,62 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) * r4 contains the required access permissions * r5 contains the trap number * - * at return r3 = 0 for success, 1 for page fault, negative for error + * at return r3 = 0 for success */ bl .hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ - /* Success */ +BEGIN_FW_FTR_SECTION + /* + * If we had interrupts soft-enabled at the point where the + * DSI/ISI occurred, and an interrupt came in during hash_page, + * handle it now. + * We jump to ret_from_except_lite rather than fast_exception_return + * because ret_from_except_lite will check for and handle pending + * interrupts if necessary. + */ + beq 13f +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) + +BEGIN_FW_FTR_SECTION + /* + * Here we have interrupts hard-disabled, so it is sufficient + * to restore paca->{soft,hard}_enable and get out. + */ beq fast_exc_return_irq /* Return from exception on success */ +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) + + /* For a hash failure, we don't bother re-enabling interrupts */ + ble- 12f + + /* + * hash_page couldn't handle it, set soft interrupt enable back + * to what it was before the trap. Note that .arch_local_irq_restore + * handles any interrupts pending at this point. + */ + ld r3,SOFTE(r1) + TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) + bl .arch_local_irq_restore + b 11f - /* Error */ - blt- 13f +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + bl .save_nvgprs + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr + b .ret_from_except_lite /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: + ENABLE_INTS 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl .do_page_fault cmpdi r3,0 - beq+ 12f + beq+ 13f bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD @@ -900,20 +1014,12 @@ handle_page_fault: bl .bad_page_fault b .ret_from_except -/* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - bl .save_nvgprs - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_dabr -12: b .ret_from_except_lite - +13: b .ret_from_except_lite /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ -13: bl .save_nvgprs +12: bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) @@ -1035,19 +1141,51 @@ _GLOBAL(do_stab_bolted) .= 0x7000 .globl fwnmi_data_area fwnmi_data_area: +#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + /* iSeries does not use the FWNMI stuff, so it is safe to put + * this here, even if we later allow kernels that will boot on + * both pSeries and iSeries */ +#ifdef CONFIG_PPC_ISERIES + . = LPARMAP_PHYS + .globl xLparMap +xLparMap: + .quad HvEsidsToMap /* xNumberEsids */ + .quad HvRangesToMap /* xNumberRanges */ + .quad STAB0_PAGE /* xSegmentTableOffs */ + .zero 40 /* xRsvd */ + /* xEsids (HvEsidsToMap entries of 2 quads) */ + .quad PAGE_OFFSET_ESID /* xKernelEsid */ + .quad PAGE_OFFSET_VSID /* xKernelVsid */ + .quad VMALLOC_START_ESID /* xKernelEsid */ + .quad VMALLOC_START_VSID /* xKernelVsid */ + /* xRanges (HvRangesToMap entries of 3 quads) */ + .quad HvPagesToMap /* xPages */ + .quad 0 /* xOffset */ + .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */ + +#endif /* CONFIG_PPC_ISERIES */ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* pseries and powernv need to keep the whole page from * 0x7000 to 0x8000 free for use by the firmware */ . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ -/* Space for CPU0's segment table */ - .balign 4096 +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is given to the hv + * as a page number (see xLparMap above), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_OFFSET /* 0x8000 */ .globl initial_stab initial_stab: .space 4096 - #ifdef CONFIG_PPC_POWERNV _GLOBAL(opal_mc_secondary_handler) HMT_MEDIUM diff --git a/trunk/arch/powerpc/kernel/head_32.S b/trunk/arch/powerpc/kernel/head_32.S index dc0488b6f6e1..0654dba2c1f1 100644 --- a/trunk/arch/powerpc/kernel/head_32.S +++ b/trunk/arch/powerpc/kernel/head_32.S @@ -395,7 +395,7 @@ DataAccess: bl hash_page 1: lwz r5,_DSISR(r11) /* get DSISR value */ mfspr r4,SPRN_DAR - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_EE_LITE(0x300, handle_page_fault) /* Instruction access exception. */ @@ -410,7 +410,7 @@ InstructionAccess: bl hash_page 1: mr r4,r12 mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_EE_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/trunk/arch/powerpc/kernel/head_40x.S b/trunk/arch/powerpc/kernel/head_40x.S index 4989661b710b..872a6af83bad 100644 --- a/trunk/arch/powerpc/kernel/head_40x.S +++ b/trunk/arch/powerpc/kernel/head_40x.S @@ -394,7 +394,7 @@ label: NORMAL_EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0 /* Pass zero as arg3 */ - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_EE_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -747,7 +747,7 @@ DataAccess: mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_EE_LITE(0x300, handle_page_fault) /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. diff --git a/trunk/arch/powerpc/kernel/head_64.S b/trunk/arch/powerpc/kernel/head_64.S index 58bddee8e1e8..06c7251c1bf7 100644 --- a/trunk/arch/powerpc/kernel/head_64.S +++ b/trunk/arch/powerpc/kernel/head_64.S @@ -32,13 +32,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -57,6 +57,10 @@ * entry in r9 for debugging purposes * 2. Secondary processors enter at 0x60 with PIR in gpr3 * + * For iSeries: + * 1. The MMU is on (as it always is for iSeries) + * 2. The kernel is entered at system_reset_iSeries + * * For Book3E processors: * 1. The MMU is on running in AS0 in a state defined in ePAPR * 2. The kernel is entered at __start @@ -89,6 +93,15 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .llong 0x0 +#ifdef CONFIG_PPC_ISERIES + /* + * At offset 0x20, there is a pointer to iSeries LPAR data. + * This is required by the hypervisor + */ + . = 0x20 + .llong hvReleaseData-KERNELBASE +#endif /* CONFIG_PPC_ISERIES */ + #ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This @@ -551,8 +564,7 @@ _GLOBAL(pmac_secondary_start) */ li r0,0 stb r0,PACASOFTIRQEN(r13) - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) + stb r0,PACAHARDIRQEN(r13) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) @@ -570,7 +582,7 @@ _GLOBAL(pmac_secondary_start) * 1. Processor number * 2. Segment table pointer (virtual address) * On entry the following are set: - * r1 = stack pointer (real addr of temp stack) + * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries * r24 = cpu# (in Linux terms) * r13 = paca virtual address * SPRG_PACA = paca virtual address @@ -583,7 +595,7 @@ __secondary_start: /* Set thread priority to MEDIUM */ HMT_MEDIUM - /* Initialize the kernel stack */ + /* Initialize the kernel stack. Just a repeat for iSeries. */ LOAD_REG_ADDR(r3, current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r14,r3,r28 @@ -603,16 +615,20 @@ __secondary_start: li r7,0 mtlr r7 - /* Mark interrupts soft and hard disabled (they might be enabled - * in the PACA when doing hotplug) - */ - stb r7,PACASOFTIRQEN(r13) - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) - /* enable MMU and jump to start_secondary */ LOAD_REG_ADDR(r3, .start_secondary_prolog) LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + ori r4,r4,MSR_EE + li r8,1 + stb r8,PACAHARDIRQEN(r13) +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif +BEGIN_FW_FTR_SECTION + stb r7,PACAHARDIRQEN(r13) +END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) + stb r7,PACASOFTIRQEN(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 @@ -755,18 +771,22 @@ _INIT_GLOBAL(start_here_common) /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) - /* Do more system initializations in virtual mode */ bl .setup_system - /* Mark interrupts soft and hard disabled (they might be enabled - * in the PACA when doing hotplug) - */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) + /* Load up the kernel context */ +5: + li r5,0 + stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + mfmsr r5 + ori r5,r5,MSR_EE /* Hard Enabled on iSeries*/ + mtmsrd r5 + li r5,1 +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif + stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */ - /* Generic kernel entry */ bl .start_kernel /* Not reached */ diff --git a/trunk/arch/powerpc/kernel/head_8xx.S b/trunk/arch/powerpc/kernel/head_8xx.S index b2a5860accfb..b68cb173ba2c 100644 --- a/trunk/arch/powerpc/kernel/head_8xx.S +++ b/trunk/arch/powerpc/kernel/head_8xx.S @@ -220,7 +220,7 @@ DataAccess: mfspr r4,SPRN_DAR li r10,0x00f0 mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_EE_LITE(0x300, handle_page_fault) /* Instruction access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -231,7 +231,7 @@ InstructionAccess: EXCEPTION_PROLOG mr r4,r12 mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_EE_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/trunk/arch/powerpc/kernel/head_booke.h b/trunk/arch/powerpc/kernel/head_booke.h index 0e4175388f47..fc921bf62e15 100644 --- a/trunk/arch/powerpc/kernel/head_booke.h +++ b/trunk/arch/powerpc/kernel/head_booke.h @@ -359,7 +359,7 @@ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_EE_LITE(0x0300, handle_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ @@ -368,7 +368,7 @@ stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ li r5,0; /* Pass zero as arg3 */ \ - EXC_XFER_LITE(0x0400, handle_page_fault) + EXC_XFER_EE_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ diff --git a/trunk/arch/powerpc/kernel/head_fsl_booke.S b/trunk/arch/powerpc/kernel/head_fsl_booke.S index 28e62598d0e8..d5d78c4ceef6 100644 --- a/trunk/arch/powerpc/kernel/head_fsl_booke.S +++ b/trunk/arch/powerpc/kernel/head_fsl_booke.S @@ -319,7 +319,7 @@ interrupt_base: mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_EE_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x0300, CacheLockingException) diff --git a/trunk/arch/powerpc/kernel/idle.c b/trunk/arch/powerpc/kernel/idle.c index 8f7a2b62863d..0a48bf5db6c8 100644 --- a/trunk/arch/powerpc/kernel/idle.c +++ b/trunk/arch/powerpc/kernel/idle.c @@ -84,11 +84,7 @@ void cpu_idle(void) start_critical_timings(); - /* Some power_save functions return with - * interrupts enabled, some don't. - */ - if (irqs_disabled()) - local_irq_enable(); + local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); } else { diff --git a/trunk/arch/powerpc/kernel/idle_book3e.S b/trunk/arch/powerpc/kernel/idle_book3e.S index ff007b59448d..16c002d6bdf1 100644 --- a/trunk/arch/powerpc/kernel/idle_book3e.S +++ b/trunk/arch/powerpc/kernel/idle_book3e.S @@ -29,30 +29,43 @@ _GLOBAL(book3e_idle) wrteei 0 /* Now check if an interrupt came in while we were soft disabled - * since we may otherwise lose it (doorbells etc...). + * since we may otherwise lose it (doorbells etc...). We know + * that since PACAHARDIRQEN will have been cleared in that case. */ - lbz r3,PACAIRQHAPPENED(r13) + lbz r3,PACAHARDIRQEN(r13) cmpwi cr0,r3,0 - bnelr + beqlr - /* Now we are going to mark ourselves as soft and hard enabled in + /* Now we are going to mark ourselves as soft and hard enables in * order to be able to take interrupts while asleep. We inform lockdep * of that. We don't actually turn interrupts on just yet tho. */ #ifdef CONFIG_TRACE_IRQFLAGS stdu r1,-128(r1) bl .trace_hardirqs_on - addi r1,r1,128 #endif li r0,1 stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) /* Interrupts will make use return to LR, so get something we want * in there */ bl 1f - /* And return (interrupts are on) */ + /* Hard disable interrupts again */ + wrteei 0 + + /* Mark them off again in the PACA as well */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + + /* Tell lockdep about it */ +#ifdef CONFIG_TRACE_IRQFLAGS + bl .trace_hardirqs_off + addi r1,r1,128 +#endif ld r0,16(r1) mtlr r0 blr diff --git a/trunk/arch/powerpc/kernel/idle_power4.S b/trunk/arch/powerpc/kernel/idle_power4.S index d8cdba4c28b2..ba3195478600 100644 --- a/trunk/arch/powerpc/kernel/idle_power4.S +++ b/trunk/arch/powerpc/kernel/idle_power4.S @@ -14,7 +14,6 @@ #include #include #include -#include #undef DEBUG @@ -30,31 +29,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) cmpwi 0,r4,0 beqlr - /* Hard disable interrupts */ + /* Go to NAP now */ mfmsr r7 rldicl r0,r7,48,1 rotldi r0,r0,16 - mtmsrd r0,1 - - /* Check if something happened while soft-disabled */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bnelr - - /* Soft-enable interrupts */ -#ifdef CONFIG_TRACE_IRQFLAGS - mflr r0 - std r0,16(r1) - stdu r1,-128(r1) - bl .trace_hardirqs_on - addi r1,r1,128 - ld r0,16(r1) - mtlr r0 -#endif /* CONFIG_TRACE_IRQFLAGS */ - - TRACE_ENABLE_INTS + mtmsrd r0,1 /* hard-disable interrupts */ li r0,1 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) BEGIN_FTR_SECTION DSSALL sync diff --git a/trunk/arch/powerpc/kernel/idle_power7.S b/trunk/arch/powerpc/kernel/idle_power7.S index 0cdc9a392839..fcdff198da4b 100644 --- a/trunk/arch/powerpc/kernel/idle_power7.S +++ b/trunk/arch/powerpc/kernel/idle_power7.S @@ -1,5 +1,5 @@ /* - * This file contains the power_save function for Power7 CPUs. + * This file contains the power_save function for 970-family CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,7 +15,6 @@ #include #include #include -#include #undef DEBUG @@ -52,25 +51,9 @@ _GLOBAL(power7_idle) rldicl r9,r9,48,1 rotldi r9,r9,16 mtmsrd r9,1 /* hard-disable interrupts */ - - /* Check if something happened while soft-disabled */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - beq 1f - addi r1,r1,INT_FRAME_SIZE - ld r0,16(r1) - mtlr r0 - blr - -1: /* We mark irqs hard disabled as this is the state we'll - * be in when returning and we need to tell arch_local_irq_restore() - * about it - */ - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) - - /* We haven't lost state ... yet */ li r0,0 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) stb r0,PACA_NAPSTATELOST(r13) /* Continue saving state */ diff --git a/trunk/arch/powerpc/kernel/irq.c b/trunk/arch/powerpc/kernel/irq.c index eb804e15b29b..9b6e80668cfb 100644 --- a/trunk/arch/powerpc/kernel/irq.c +++ b/trunk/arch/powerpc/kernel/irq.c @@ -95,14 +95,14 @@ extern int tau_interrupts(int); int distribute_irqs = 1; -static inline notrace unsigned long get_irq_happened(void) +static inline notrace unsigned long get_hard_enabled(void) { - unsigned long happened; + unsigned long enabled; __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); + : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); - return happened; + return enabled; } static inline notrace void set_soft_enabled(unsigned long enable) @@ -111,167 +111,88 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -static inline notrace int decrementer_check_overflow(void) +static inline notrace void decrementer_check_overflow(void) { - u64 now = get_tb_or_rtc(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - + u64 now = get_tb_or_rtc(); + u64 *next_tb; + + preempt_disable(); + next_tb = &__get_cpu_var(decrementers_next_tb); + if (now >= *next_tb) set_dec(1); - return now >= *next_tb; + preempt_enable(); } -/* This is called whenever we are re-enabling interrupts - * and returns either 0 (nothing to do) or 500/900 if there's - * either an EE or a DEC to generate. - * - * This is called in two contexts: From arch_local_irq_restore() - * before soft-enabling interrupts, and from the exception exit - * path when returning from an interrupt from a soft-disabled to - * a soft enabled context. In both case we have interrupts hard - * disabled. - * - * We take care of only clearing the bits we handled in the - * PACA irq_happened field since we can only re-emit one at a - * time and we don't want to "lose" one. - */ -notrace unsigned int __check_irq_replay(void) +notrace void arch_local_irq_restore(unsigned long en) { /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function - */ - unsigned char happened = local_paca->irq_happened; - - /* Clear bit 0 which we wouldn't clear otherwise */ - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. - */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); - } - - /* - * We may have missed a decrementer interrupt. We check the - * decrementer itself rather than the paca irq_happened field - * in case we also had a rollover while hard disabled - */ - local_paca->irq_happened &= ~PACA_IRQ_DEC; - if (decrementer_check_overflow()) - return 0x900; - - /* Finally check if an external interrupt happened */ - local_paca->irq_happened &= ~PACA_IRQ_EE; - if (happened & PACA_IRQ_EE) - return 0x500; - -#ifdef CONFIG_PPC_BOOK3E - /* Finally check if an EPR external interrupt happened - * this bit is typically set if we need to handle another - * "edge" interrupt from within the MPIC "EPR" handler + * get_paca()->soft_enabled = en; + * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? + * That was allowed before, and in such a case we do need to take care + * that gcc will set soft_enabled directly via r13, not choose to use + * an intermediate register, lest we're preempted to a different cpu. */ - local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; - if (happened & PACA_IRQ_EE_EDGE) - return 0x500; - - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - if (happened & PACA_IRQ_DBELL) - return 0x280; -#endif /* CONFIG_PPC_BOOK3E */ - - /* There should be nothing left ! */ - BUG_ON(local_paca->irq_happened != 0); - - return 0; -} - -notrace void arch_local_irq_restore(unsigned long en) -{ - unsigned char irq_happened; - unsigned int replay; - - /* Write the new soft-enabled value */ set_soft_enabled(en); if (!en) return; + +#ifdef CONFIG_PPC_STD_MMU_64 + if (firmware_has_feature(FW_FEATURE_ISERIES)) { + /* + * Do we need to disable preemption here? Not really: in the + * unlikely event that we're preempted to a different cpu in + * between getting r13, loading its lppaca_ptr, and loading + * its any_int, we might call iseries_handle_interrupts without + * an interrupt pending on the new cpu, but that's no disaster, + * is it? And the business of preempting us off the old cpu + * would itself involve a local_irq_restore which handles the + * interrupt to that cpu. + * + * But use "local_paca->lppaca_ptr" instead of "get_lppaca()" + * to avoid any preemption checking added into get_paca(). + */ + if (local_paca->lppaca_ptr->int_dword.any_int) + iseries_handle_interrupts(); + } +#endif /* CONFIG_PPC_STD_MMU_64 */ + /* - * From this point onward, we can take interrupts, preempt, - * etc... unless we got hard-disabled. We check if an event - * happened. If none happened, we know we can just return. - * - * We may have preempted before the check below, in which case - * we are checking the "new" CPU instead of the old one. This - * is only a problem if an event happened on the "old" CPU. - * - * External interrupt events on non-iseries will have caused - * interrupts to be hard-disabled, so there is no problem, we - * cannot have preempted. - * - * That leaves us with EEs on iSeries or decrementer interrupts, - * which I decided to safely ignore. The preemption would have - * itself been the result of an interrupt, upon which return we - * will have checked for pending events on the old CPU. + * if (get_paca()->hard_enabled) return; + * But again we need to take care that gcc gets hard_enabled directly + * via r13, not choose to use an intermediate register, lest we're + * preempted to a different cpu in between the two instructions. */ - irq_happened = get_irq_happened(); - if (!irq_happened) + if (get_hard_enabled()) return; /* - * We need to hard disable to get a trusted value from - * __check_irq_replay(). We also need to soft-disable - * again to avoid warnings in there due to the use of - * per-cpu variables. - * - * We know that if the value in irq_happened is exactly 0x01 - * then we are already hard disabled (there are other less - * common cases that we'll ignore for now), so we skip the - * (expensive) mtmsrd. + * Need to hard-enable interrupts here. Since currently disabled, + * no need to take further asm precautions against preemption; but + * use local_paca instead of get_paca() to avoid preemption checking. */ - if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) - __hard_irq_disable(); - set_soft_enabled(0); + local_paca->hard_enabled = en; /* - * Check if anything needs to be re-emitted. We haven't - * soft-enabled yet to avoid warnings in decrementer_check_overflow - * accessing per-cpu variables + * Trigger the decrementer if we have a pending event. Some processors + * only trigger on edge transitions of the sign bit. We might also + * have disabled interrupts long enough that the decrementer wrapped + * to positive. */ - replay = __check_irq_replay(); - - /* We can soft-enable now */ - set_soft_enabled(1); + decrementer_check_overflow(); /* - * And replay if we have to. This will return with interrupts - * hard-enabled. + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. */ - if (replay) { - __replay_interrupt(replay); - return; + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); } - /* Finally, let's ensure we are hard enabled */ __hard_irq_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); - -/* - * This is specifically called by assembly code to re-enable interrupts - * if they are currently disabled. This is typically called before - * schedule() or do_signal() when returning to userspace. We do it - * in C to avoid the burden of dealing with lockdep etc... - */ -void restore_interrupts(void) -{ - if (irqs_disabled()) - local_irq_enable(); -} - #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) @@ -439,17 +360,8 @@ void do_IRQ(struct pt_regs *regs) check_stack_overflow(); - /* - * Query the platform PIC for the interrupt & ack it. - * - * This will typically lower the interrupt line to the CPU - */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ - may_hard_irq_enable(); - - /* And finally process it */ if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) handle_one_irq(irq); else if (irq != NO_IRQ_IGNORE) diff --git a/trunk/arch/powerpc/kernel/misc.S b/trunk/arch/powerpc/kernel/misc.S index ba16874fe294..b69463ec2010 100644 --- a/trunk/arch/powerpc/kernel/misc.S +++ b/trunk/arch/powerpc/kernel/misc.S @@ -5,6 +5,7 @@ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) * and Paul Mackerras. * + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) * * setjmp/longjmp code by Paul Mackerras. diff --git a/trunk/arch/powerpc/kernel/process.c b/trunk/arch/powerpc/kernel/process.c index e40707032ac3..d817ab018486 100644 --- a/trunk/arch/powerpc/kernel/process.c +++ b/trunk/arch/powerpc/kernel/process.c @@ -647,9 +647,6 @@ void show_regs(struct pt_regs * regs) printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); -#ifdef CONFIG_PPC64 - printk("SOFTE: %ld\n", regs->softe); -#endif trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) printk("CFAR: "REG"\n", regs->orig_gpr3); @@ -1223,32 +1220,34 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); #ifdef CONFIG_PPC64 -/* Called with hard IRQs off */ -void __ppc64_runlatch_on(void) +void ppc64_runlatch_on(void) { - struct thread_info *ti = current_thread_info(); unsigned long ctrl; - ctrl = mfspr(SPRN_CTRLF); - ctrl |= CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { + HMT_medium(); + + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); - ti->local_flags |= TLF_RUNLATCH; + set_thread_flag(TIF_RUNLATCH); + } } -/* Called with hard IRQs off */ void __ppc64_runlatch_off(void) { - struct thread_info *ti = current_thread_info(); unsigned long ctrl; - ti->local_flags &= ~TLF_RUNLATCH; + HMT_medium(); + + clear_thread_flag(TIF_RUNLATCH); ctrl = mfspr(SPRN_CTRLF); ctrl &= ~CTRL_RUNLATCH; mtspr(SPRN_CTRLT, ctrl); } -#endif /* CONFIG_PPC64 */ +#endif #if THREAD_SHIFT < PAGE_SHIFT diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c index f81c81b92f0e..567dd7c3ac2a 100644 --- a/trunk/arch/powerpc/kernel/time.c +++ b/trunk/arch/powerpc/kernel/time.c @@ -259,6 +259,7 @@ void accumulate_stolen_time(void) u64 sst, ust; u8 save_soft_enabled = local_paca->soft_enabled; + u8 save_hard_enabled = local_paca->hard_enabled; /* We are called early in the exception entry, before * soft/hard_enabled are sync'ed to the expected state @@ -267,6 +268,7 @@ void accumulate_stolen_time(void) * complain */ local_paca->soft_enabled = 0; + local_paca->hard_enabled = 0; sst = scan_dispatch_log(local_paca->starttime_user); ust = scan_dispatch_log(local_paca->starttime); @@ -275,6 +277,7 @@ void accumulate_stolen_time(void) local_paca->stolen_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; + local_paca->hard_enabled = save_hard_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) @@ -577,11 +580,6 @@ void timer_interrupt(struct pt_regs * regs) if (!cpu_online(smp_processor_id())) return; - /* Conditionally hard-enable interrupts now that the DEC has been - * bumped to its maximum value - */ - may_hard_irq_enable(); - trace_timer_interrupt_entry(regs); __get_cpu_var(irq_stat).timer_irqs++; diff --git a/trunk/arch/powerpc/kernel/traps.c b/trunk/arch/powerpc/kernel/traps.c index a750409ccc4e..5d40e592ffcb 100644 --- a/trunk/arch/powerpc/kernel/traps.c +++ b/trunk/arch/powerpc/kernel/traps.c @@ -247,9 +247,6 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) addr, regs->nip, regs->link, code); } - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); - memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/trunk/arch/powerpc/kernel/vmlinux.lds.S b/trunk/arch/powerpc/kernel/vmlinux.lds.S index 65d1c08cf09e..710a54005dfb 100644 --- a/trunk/arch/powerpc/kernel/vmlinux.lds.S +++ b/trunk/arch/powerpc/kernel/vmlinux.lds.S @@ -109,6 +109,11 @@ SECTIONS __ptov_table_begin = .; *(.ptov_fixup); __ptov_table_end = .; +#ifdef CONFIG_PPC_ISERIES + __dt_strings_start = .; + *(.dt_strings); + __dt_strings_end = .; +#endif } .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { diff --git a/trunk/arch/powerpc/mm/fault.c b/trunk/arch/powerpc/mm/fault.c index 19f2f9498b27..2f0d1b032a89 100644 --- a/trunk/arch/powerpc/mm/fault.c +++ b/trunk/arch/powerpc/mm/fault.c @@ -105,82 +105,6 @@ static int store_updates_sp(struct pt_regs *regs) } return 0; } -/* - * do_page_fault error handling helpers - */ - -#define MM_FAULT_RETURN 0 -#define MM_FAULT_CONTINUE -1 -#define MM_FAULT_ERR(sig) (sig) - -static int out_of_memory(struct pt_regs *regs) -{ - /* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ - up_read(¤t->mm->mmap_sem); - if (!user_mode(regs)) - return MM_FAULT_ERR(SIGKILL); - pagefault_out_of_memory(); - return MM_FAULT_RETURN; -} - -static int do_sigbus(struct pt_regs *regs, unsigned long address) -{ - siginfo_t info; - - up_read(¤t->mm->mmap_sem); - - if (user_mode(regs)) { - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; - } - return MM_FAULT_ERR(SIGBUS); -} - -static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) -{ - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue the pagefault. - */ - if (fatal_signal_pending(current)) { - /* - * If we have retry set, the mmap semaphore will have - * alrady been released in __lock_page_or_retry(). Else - * we release it now. - */ - if (!(fault & VM_FAULT_RETRY)) - up_read(¤t->mm->mmap_sem); - /* Coming from kernel, we need to deal with uaccess fixups */ - if (user_mode(regs)) - return MM_FAULT_RETURN; - return MM_FAULT_ERR(SIGKILL); - } - - /* No fault: be happy */ - if (!(fault & VM_FAULT_ERROR)) - return MM_FAULT_CONTINUE; - - /* Out of memory */ - if (fault & VM_FAULT_OOM) - return out_of_memory(regs); - - /* Bus error. x86 handles HWPOISON here, we'll add this if/when - * we support the feature in HW - */ - if (fault & VM_FAULT_SIGBUS) - return do_sigbus(regs, addr); - - /* We don't understand the fault code, this is fatal */ - BUG(); - return MM_FAULT_CONTINUE; -} /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -200,12 +124,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + siginfo_t info; int code = SEGV_MAPERR; - int is_write = 0; + int is_write = 0, ret; int trap = TRAP(regs); int is_exec = trap == 0x400; - int fault; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -222,9 +145,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ - if (is_write) - flags |= FAULT_FLAG_WRITE; - #ifdef CONFIG_PPC_ICSWX /* * we need to do this early because this "data storage @@ -232,11 +152,13 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int rc = acop_handle_fault(regs, address, error_code); - if (rc) - return rc; + int ret; + + ret = acop_handle_fault(regs, address, error_code); + if (ret) + return ret; } -#endif /* CONFIG_PPC_ICSWX */ +#endif if (notify_page_fault(regs)) return 0; @@ -257,10 +179,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } #endif - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); - if (in_atomic() || mm == NULL) { if (!user_mode(regs)) return SIGSEGV; @@ -294,15 +212,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && !search_exception_tables(regs->nip)) goto bad_area_nosemaphore; -retry: down_read(&mm->mmap_sem); - } else { - /* - * The above down_read_trylock() might have succeeded in - * which case we'll have missed the might_sleep() from - * down_read(): - */ - might_sleep(); } vma = find_vma(mm, address); @@ -417,43 +327,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); - if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - int rc = mm_fault_error(regs, address, fault); - if (rc >= MM_FAULT_RETURN) - return rc; + ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); + if (unlikely(ret & VM_FAULT_ERROR)) { + if (ret & VM_FAULT_OOM) + goto out_of_memory; + else if (ret & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } - - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); + if (ret & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); #ifdef CONFIG_PPC_SMLPAR - if (firmware_has_feature(FW_FEATURE_CMO)) { - preempt_disable(); - get_lppaca()->page_ins += (1 << PAGE_FACTOR); - preempt_enable(); - } -#endif /* CONFIG_PPC_SMLPAR */ - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; - goto retry; + if (firmware_has_feature(FW_FEATURE_CMO)) { + preempt_disable(); + get_lppaca()->page_ins += (1 << PAGE_FACTOR); + preempt_enable(); } +#endif + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); } - up_read(&mm->mmap_sem); return 0; @@ -474,6 +371,28 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, return SIGSEGV; +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (!user_mode(regs)) + return SIGKILL; + pagefault_out_of_memory(); + return 0; + +do_sigbus: + up_read(&mm->mmap_sem); + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return 0; + } + return SIGBUS; } /* diff --git a/trunk/arch/powerpc/mm/slb_low.S b/trunk/arch/powerpc/mm/slb_low.S index b9ee79ce2200..ef653dc95b65 100644 --- a/trunk/arch/powerpc/mm/slb_low.S +++ b/trunk/arch/powerpc/mm/slb_low.S @@ -217,6 +217,21 @@ slb_finish_load: * free slot first but that took too long. Unfortunately we * dont have any LRU information to help us choose a slot. */ +#ifdef CONFIG_PPC_ISERIES +BEGIN_FW_FTR_SECTION + /* + * On iSeries, the "bolted" stack segment can be cast out on + * shared processor switch so we need to check for a miss on + * it and restore it to the right slot. + */ + ld r9,PACAKSAVE(r13) + clrrdi r9,r9,28 + clrrdi r3,r3,28 + li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ + cmpld r9,r3 + beq 3f +END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) +#endif /* CONFIG_PPC_ISERIES */ 7: ld r10,PACASTABRR(r13) addi r10,r10,1 @@ -267,6 +282,7 @@ _GLOBAL(slb_compare_rr_to_size) /* * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. + * We assume legacy iSeries will never have 1T segments. * * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9 */ diff --git a/trunk/arch/powerpc/platforms/pseries/eeh.c b/trunk/arch/powerpc/platforms/pseries/eeh.c index c0b40af4ce4f..5f6d37bdd4f5 100644 --- a/trunk/arch/powerpc/platforms/pseries/eeh.c +++ b/trunk/arch/powerpc/platforms/pseries/eeh.c @@ -1,8 +1,8 @@ /* - * eeh.c * Copyright IBM Corporation 2001, 2005, 2006 * Copyright Dave Engebretsen & Todd Inglett 2001 * Copyright Linas Vepstas 2005, 2006 + * Copyright 2001-2012 IBM Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ */ #include -#include /* for init_mm */ +#include #include #include #include @@ -129,9 +129,16 @@ static unsigned long slot_resets; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) -/* --------------------------------------------------------------- */ -/* Below lies the EEH event infrastructure */ - +/** + * rtas_slot_error_detail - Retrieve error log through RTAS call + * @pdn: device node + * @severity: temporary or permanent error log + * @driver_log: driver log to be combined with the retrieved error log + * @loglen: length of driver log + * + * This routine should be called to retrieve error log through the dedicated + * RTAS call. + */ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, char *driver_log, size_t loglen) { @@ -163,7 +170,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, } /** - * gather_pci_data - copy assorted PCI config space registers to buff + * gather_pci_data - Copy assorted PCI config space registers to buff * @pdn: device to report data for * @buf: point to buffer in which to log * @len: amount of room in buffer @@ -258,6 +265,16 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) return n; } +/** + * eeh_slot_error_detail - Generate combined log including driver log and error log + * @pdn: device node + * @severity: temporary or permanent error log + * + * This routine should be called to generate the combined log, which + * is comprised of driver log and error log. The driver log is figured + * out from the config space of the corresponding PCI device, while + * the error log is fetched through platform dependent function call. + */ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) { size_t loglen = 0; @@ -275,6 +292,9 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) * read_slot_reset_state - Read the reset state of a device node's slot * @dn: device node to read * @rets: array to return results in + * + * Read the reset state of a device node's slot through platform dependent + * function call. */ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) { @@ -300,9 +320,9 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) } /** - * eeh_wait_for_slot_status - returns error status of slot - * @pdn pci device node - * @max_wait_msecs maximum number to millisecs to wait + * eeh_wait_for_slot_status - Returns error status of slot + * @pdn: pci device node + * @max_wait_msecs: maximum number to millisecs to wait * * Return negative value if a permanent error, else return * Partition Endpoint (PE) status value. @@ -332,16 +352,16 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) mwait = rets[2]; if (mwait <= 0) { - printk (KERN_WARNING - "EEH: Firmware returned bad wait value=%d\n", mwait); + printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n", + mwait); mwait = 1000; } else if (mwait > 300*1000) { - printk (KERN_WARNING - "EEH: Firmware is taking too long, time=%d\n", mwait); + printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n", + mwait); mwait = 300*1000; } max_wait_msecs -= mwait; - msleep (mwait); + msleep(mwait); } printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); @@ -349,8 +369,11 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) } /** - * eeh_token_to_phys - convert EEH address token to phys address - * @token i/o token, should be address in the form 0xA.... + * eeh_token_to_phys - Convert EEH address token to phys address + * @token: I/O token, should be address in the form 0xA.... + * + * This routine should be called to convert virtual I/O address + * to physical one. */ static inline unsigned long eeh_token_to_phys(unsigned long token) { @@ -365,8 +388,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) return pa | (token & (PAGE_SIZE-1)); } -/** - * Return the "partitionable endpoint" (pe) under which this device lies +/** + * find_device_pe - Retrieve the PE for the given device + * @dn: device node + * + * Return the PE under which this device lies */ struct device_node * find_device_pe(struct device_node *dn) { @@ -377,14 +403,18 @@ struct device_node * find_device_pe(struct device_node *dn) return dn; } -/** Mark all devices that are children of this device as failed. - * Mark the device driver too, so that it can see the failure - * immediately; this is critical, since some drivers poll - * status registers in interrupts ... If a driver is polling, - * and the slot is frozen, then the driver can deadlock in - * an interrupt context, which is bad. +/** + * __eeh_mark_slot - Mark all child devices as failed + * @parent: parent device + * @mode_flag: failure flag + * + * Mark all devices that are children of this device as failed. + * Mark the device driver too, so that it can see the failure + * immediately; this is critical, since some drivers poll + * status registers in interrupts ... If a driver is polling, + * and the slot is frozen, then the driver can deadlock in + * an interrupt context, which is bad. */ - static void __eeh_mark_slot(struct device_node *parent, int mode_flag) { struct device_node *dn; @@ -404,10 +434,18 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag) } } -void eeh_mark_slot (struct device_node *dn, int mode_flag) +/** + * eeh_mark_slot - Mark the indicated device and its children as failed + * @dn: parent device + * @mode_flag: failure flag + * + * Mark the indicated device and its child devices as failed. + * The device drivers are marked as failed as well. + */ +void eeh_mark_slot(struct device_node *dn, int mode_flag) { struct pci_dev *dev; - dn = find_device_pe (dn); + dn = find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -423,6 +461,13 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag) __eeh_mark_slot(dn, mode_flag); } +/** + * __eeh_clear_slot - Clear failure flag for the child devices + * @parent: parent device + * @mode_flag: flag to be cleared + * + * Clear failure flag for the child devices. + */ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) { struct device_node *dn; @@ -436,12 +481,19 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) } } -void eeh_clear_slot (struct device_node *dn, int mode_flag) +/** + * eeh_clear_slot - Clear failure flag for the indicated device and its children + * @dn: parent device + * @mode_flag: flag to be cleared + * + * Clear failure flag for the indicated device and its children. + */ +void eeh_clear_slot(struct device_node *dn, int mode_flag) { unsigned long flags; raw_spin_lock_irqsave(&confirm_error_lock, flags); - dn = find_device_pe (dn); + dn = find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -453,43 +505,10 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } -void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (PCI_DN(dn)) { - - struct pci_dev *dev = PCI_DN(dn)->pcidev; - - if (dev && dev->driver) - *freset |= dev->needs_freset; - - __eeh_set_pe_freset(dn, freset); - } - } -} - -void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) -{ - struct pci_dev *dev; - dn = find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) - dn = dn->parent; - - dev = PCI_DN(dn)->pcidev; - if (dev) - *freset |= dev->needs_freset; - - __eeh_set_pe_freset(dn, freset); -} - /** - * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze - * @dn device node - * @dev pci device, if known + * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze + * @dn: device node + * @dev: pci device, if known * * Check for an EEH failure for the given device node. Call this * routine if the result of a read was all 0xff's and you want to @@ -548,11 +567,11 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) pdn->eeh_check_count ++; if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) { location = of_get_property(dn, "ibm,loc-code", NULL); - printk (KERN_ERR "EEH: %d reads ignored for recovering device at " + printk(KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", pdn->eeh_check_count, location, eeh_driver_name(dev), eeh_pci_name(dev)); - printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n", + printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", eeh_driver_name(dev)); dump_stack(); } @@ -579,7 +598,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) } /* Note that config-io to empty slots may fail; - * they are empty when they don't have children. */ + * they are empty when they don't have children. + */ if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) { false_positives++; pdn->eeh_false_positives ++; @@ -609,15 +629,17 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) /* Avoid repeated reports of this failure, including problems * with other functions on this device, and functions under - * bridges. */ - eeh_mark_slot (dn, EEH_MODE_ISOLATED); + * bridges. + */ + eeh_mark_slot(dn, EEH_MODE_ISOLATED); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event (dn, dev); + eeh_send_failure_event(dn, dev); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure - * out what happened. So print that out. */ + * out what happened. So print that out. + */ dump_stack(); return 1; @@ -629,9 +651,9 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) EXPORT_SYMBOL_GPL(eeh_dn_check_failure); /** - * eeh_check_failure - check if all 1's data is due to EEH slot freeze - * @token i/o token, should be address in the form 0xA.... - * @val value, should be all 1's (XXX why do we need this arg??) + * eeh_check_failure - Check if all 1's data is due to EEH slot freeze + * @token: I/O token, should be address in the form 0xA.... + * @val: value, should be all 1's (XXX why do we need this arg??) * * Check for an EEH failure at the given token address. Call this * routine if the result of a read was all 0xff's and you want to @@ -655,7 +677,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon } dn = pci_device_to_OF_node(dev); - eeh_dn_check_failure (dn, dev); + eeh_dn_check_failure(dn, dev); pci_dev_put(dev); return val; @@ -663,14 +685,15 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon EXPORT_SYMBOL(eeh_check_failure); -/* ------------------------------------------------------------- */ -/* The code below deals with error recovery */ /** - * rtas_pci_enable - enable MMIO or DMA transfers for this slot + * rtas_pci_enable - Enable MMIO or DMA transfers for this slot * @pdn pci device node + * + * This routine should be called to reenable frozen MMIO or DMA + * so that it would work correctly again. It's useful while doing + * recovery or log collection on the indicated device. */ - int rtas_pci_enable(struct pci_dn *pdn, int function) { @@ -692,7 +715,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function) printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", function, rc, pdn->node->full_name); - rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); if ((rc == 4) && (function == EEH_THAW_MMIO)) return 0; @@ -700,27 +723,25 @@ rtas_pci_enable(struct pci_dn *pdn, int function) } /** - * rtas_pci_slot_reset - raises/lowers the pci #RST line - * @pdn pci device node + * rtas_pci_slot_reset - Raises/Lowers the pci #RST line + * @pdn: pci device node * @state: 1/0 to raise/lower the #RST * * Clear the EEH-frozen condition on a slot. This routine * asserts the PCI #RST line if the 'state' argument is '1', * and drops the #RST line if 'state is '0'. This routine is * safe to call in an interrupt context. - * */ - static void rtas_pci_slot_reset(struct pci_dn *pdn, int state) { int config_addr; int rc; - BUG_ON (pdn==NULL); + BUG_ON(pdn==NULL); if (!pdn->phb) { - printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", + printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", pdn->node->full_name); return; } @@ -752,12 +773,12 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) /** * pcibios_set_pcie_slot_reset - Set PCI-E reset state - * @dev: pci device struct - * @state: reset state to enter + * @dev: pci device struct + * @state: reset state to enter * * Return value: * 0 if success - **/ + */ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct device_node *dn = pci_device_to_OF_node(dev); @@ -781,10 +802,62 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat } /** - * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second - * @pdn: pci device node to be reset. + * __eeh_set_pe_freset - Check the required reset for child devices + * @parent: parent device + * @freset: return value + * + * Each device might have its preferred reset type: fundamental or + * hot reset. The routine is used to collect the information from + * the child devices so that they could be reset accordingly. */ +void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) +{ + struct device_node *dn; + + for_each_child_of_node(parent, dn) { + if (PCI_DN(dn)) { + struct pci_dev *dev = PCI_DN(dn)->pcidev; + + if (dev && dev->driver) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); + } + } +} + +/** + * eeh_set_pe_freset - Check the required reset for the indicated device and its children + * @dn: parent device + * @freset: return value + * + * Each device might have its preferred reset type: fundamental or + * hot reset. The routine is used to collected the information for + * the indicated device and its children so that the bunch of the + * devices could be reset properly. + */ +void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +{ + struct pci_dev *dev; + dn = find_device_pe(dn); + + /* Back up one, since config addrs might be shared */ + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + dn = dn->parent; + dev = PCI_DN(dn)->pcidev; + if (dev) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); +} + +/** + * __rtas_set_slot_reset - Assert the pci #RST line for 1/4 second + * @pdn: pci device node to be reset. + * + * Assert the PCI #RST line for 1/4 second. + */ static void __rtas_set_slot_reset(struct pci_dn *pdn) { unsigned int freset = 0; @@ -803,25 +876,35 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn) rtas_pci_slot_reset(pdn, 1); /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. */ - + * a 100 milliseconds. We wait a bit longer 'just in case'. + */ #define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep (PCI_BUS_RST_HOLD_TIME_MSEC); + msleep(PCI_BUS_RST_HOLD_TIME_MSEC); /* We might get hit with another EEH freeze as soon as the * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. */ - eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED); + * these, and clear the flag now. + */ + eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED); - rtas_pci_slot_reset (pdn, 0); + rtas_pci_slot_reset(pdn, 0); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. */ + * up traffic. + */ #define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep (PCI_BUS_SETTLE_TIME_MSEC); + msleep(PCI_BUS_SETTLE_TIME_MSEC); } +/** + * rtas_set_slot_reset - Reset the indicated PE + * @pdn: PCI device node + * + * This routine should be called to reset indicated device, including + * PE. A PE might include multiple PCI devices and sometimes PCI bridges + * might be involved as well. + */ int rtas_set_slot_reset(struct pci_dn *pdn) { int i, rc; @@ -846,7 +929,6 @@ int rtas_set_slot_reset(struct pci_dn *pdn) return -1; } -/* ------------------------------------------------------- */ /** Save and restore of PCI BARs * * Although firmware will set up BARs during boot, it doesn't @@ -863,7 +945,7 @@ int rtas_set_slot_reset(struct pci_dn *pdn) * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. */ -static inline void __restore_bars (struct pci_dn *pdn) +static inline void __restore_bars(struct pci_dn *pdn) { int i; u32 cmd; @@ -879,17 +961,18 @@ static inline void __restore_bars (struct pci_dn *pdn) #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) - rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1, + rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - rtas_write_config (pdn, PCI_LATENCY_TIMER, 1, + rtas_write_config(pdn, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); /* Restore PERR & SERR bits, some devices require it, - don't touch the other command bits */ + * don't touch the other command bits + */ rtas_read_config(pdn, PCI_COMMAND, 4, &cmd); if (pdn->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; @@ -903,7 +986,8 @@ static inline void __restore_bars (struct pci_dn *pdn) } /** - * eeh_restore_bars - restore the PCI config space info + * eeh_restore_bars - Restore the PCI config space info + * @pdn: PCI device node * * This routine performs a recursive walk to the children * of this device as well. @@ -915,14 +999,15 @@ void eeh_restore_bars(struct pci_dn *pdn) return; if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) - __restore_bars (pdn); + __restore_bars(pdn); for_each_child_of_node(pdn->node, dn) - eeh_restore_bars (PCI_DN(dn)); + eeh_restore_bars(PCI_DN(dn)); } /** - * eeh_save_bars - save device bars + * eeh_save_bars - Save device bars + * @pdn: PCI device node * * Save the values of the device bars. Unlike the restore * routine, this routine is *not* recursive. This is because @@ -940,6 +1025,14 @@ static void eeh_save_bars(struct pci_dn *pdn) rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); } +/** + * rtas_configure_bridge - Configure PCI bridges for the indicated PE + * @pdn: PCI device node + * + * PCI bridges might be included in PE. In order to make the PE work + * again. The included PCI bridges should be recovered after the PE + * encounters frozen state. + */ void rtas_configure_bridge(struct pci_dn *pdn) { @@ -963,17 +1056,11 @@ rtas_configure_bridge(struct pci_dn *pdn) BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); if (rc) { - printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", + printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", rc, pdn->node->full_name); } } -/* ------------------------------------------------------------- */ -/* The code below deals with enabling EEH for devices during the - * early boot sequence. EEH must be enabled before any PCI probing - * can be done. - */ - #define EEH_ENABLE 1 struct eeh_early_enable_info { @@ -981,7 +1068,18 @@ struct eeh_early_enable_info { unsigned int buid_lo; }; -static int get_pe_addr (int config_addr, +/** + * get_pe_addr - Retrieve PE address with given BDF address + * @config_addr: BDF address + * @info: BUID of the associated PHB + * + * There're 2 kinds of addresses existing in EEH core components: + * BDF address and PE address. Besides, there has dedicated platform + * dependent function call to retrieve the PE address according to + * the given BDF address. Further more, we prefer PE address on BDF + * address in EEH core components. + */ +static int get_pe_addr(int config_addr, struct eeh_early_enable_info *info) { unsigned int rets[3]; @@ -990,12 +1088,12 @@ static int get_pe_addr (int config_addr, /* Use latest config-addr token on power6 */ if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* Make sure we have a PE in hand */ - ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, config_addr, info->buid_hi, info->buid_lo, 1); if (ret || (rets[0]==0)) return 0; - ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, config_addr, info->buid_hi, info->buid_lo, 0); if (ret) return 0; @@ -1004,7 +1102,7 @@ static int get_pe_addr (int config_addr, /* Use older config-addr token on power5 */ if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, config_addr, info->buid_hi, info->buid_lo, 0); if (ret) return 0; @@ -1013,7 +1111,15 @@ static int get_pe_addr (int config_addr, return 0; } -/* Enable eeh for the given device node. */ +/** + * early_enable_eeh - Early enable EEH on the indicated device + * @dn: device node + * @data: BUID + * + * Enable EEH functionality on the specified PCI device. The function + * is expected to be called before real PCI probing is done. However, + * the PHBs have been initialized at this point. + */ static void *early_enable_eeh(struct device_node *dn, void *data) { unsigned int rets[3]; @@ -1047,7 +1153,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data) pdn->class_code = *class_code; /* Ok... see if this device supports EEH. Some do, some don't, - * and the only way to find out is to check each and every one. */ + * and the only way to find out is to check each and every one. + */ regs = of_get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ @@ -1061,13 +1168,15 @@ static void *early_enable_eeh(struct device_node *dn, void *data) pdn->eeh_config_addr = regs[0]; /* If the newer, better, ibm,get-config-addr-info is supported, - * then use that instead. */ + * then use that instead. + */ pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info); /* Some older systems (Power4) allow the * ibm,set-eeh-option call to succeed even on nodes * where EEH is not supported. Verify support - * explicitly. */ + * explicitly. + */ ret = read_slot_reset_state(pdn, rets); if ((ret == 0) && (rets[1] == 1)) enable = 1; @@ -1083,7 +1192,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data) } else { /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. */ + * EEH parent, in which case we mark it as supported. + */ if (dn->parent && PCI_DN(dn->parent) && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { /* Parent supports EEH. */ @@ -1101,7 +1211,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data) return NULL; } -/* +/** + * eeh_init - EEH initialization + * * Initialize EEH by trying to enable it for all of the adapters in the system. * As a side effect we can determine here if eeh is supported at all. * Note that we leave EEH on so failed config cycles won't cause a machine @@ -1133,7 +1245,7 @@ void __init eeh_init(void) ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); - ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); + ibm_configure_bridge = rtas_token("ibm,configure-bridge"); ibm_configure_pe = rtas_token("ibm,configure-pe"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) @@ -1170,7 +1282,7 @@ void __init eeh_init(void) } /** - * eeh_add_device_early - enable EEH for the indicated device_node + * eeh_add_device_early - Enable EEH for the indicated device_node * @dn: device node for which to set up EEH * * This routine must be used to perform EEH initialization for PCI @@ -1199,6 +1311,14 @@ static void eeh_add_device_early(struct device_node *dn) early_enable_eeh(dn, &info); } +/** + * eeh_add_device_tree_early - Enable EEH for the indicated device + * @dn: device node + * + * This routine must be used to perform EEH initialization for the + * indicated PCI device that was added after system boot (e.g. + * hotplug, dlpar). + */ void eeh_add_device_tree_early(struct device_node *dn) { struct device_node *sib; @@ -1210,7 +1330,7 @@ void eeh_add_device_tree_early(struct device_node *dn) EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); /** - * eeh_add_device_late - perform EEH initialization for the indicated pci device + * eeh_add_device_late - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH * * This routine must be used to complete EEH initialization for PCI @@ -1234,13 +1354,21 @@ static void eeh_add_device_late(struct pci_dev *dev) } WARN_ON(pdn->pcidev); - pci_dev_get (dev); + pci_dev_get(dev); pdn->pcidev = dev; pci_addr_cache_insert_device(dev); eeh_sysfs_add_device(dev); } +/** + * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus + * @bus: PCI bus + * + * This routine must be used to perform EEH initialization for PCI + * devices which are attached to the indicated PCI bus. The PCI bus + * is added after system boot through hotplug or dlpar. + */ void eeh_add_device_tree_late(struct pci_bus *bus) { struct pci_dev *dev; @@ -1257,7 +1385,7 @@ void eeh_add_device_tree_late(struct pci_bus *bus) EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); /** - * eeh_remove_device - undo EEH setup for the indicated pci device + * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed * * This routine should be called when a device is removed from @@ -1281,12 +1409,20 @@ static void eeh_remove_device(struct pci_dev *dev) return; } PCI_DN(dn)->pcidev = NULL; - pci_dev_put (dev); + pci_dev_put(dev); pci_addr_cache_remove_device(dev); eeh_sysfs_remove_device(dev); } +/** + * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device + * @dev: PCI device + * + * This routine must be called when a device is removed from the + * running system through hotplug or dlpar. The corresponding + * PCI address cache will be removed. + */ void eeh_remove_bus_device(struct pci_dev *dev) { struct pci_bus *bus = dev->subordinate; diff --git a/trunk/arch/powerpc/platforms/pseries/processor_idle.c b/trunk/arch/powerpc/platforms/pseries/processor_idle.c index a12e95af6933..085fd3f45ad2 100644 --- a/trunk/arch/powerpc/platforms/pseries/processor_idle.c +++ b/trunk/arch/powerpc/platforms/pseries/processor_idle.c @@ -96,20 +96,6 @@ static int snooze_loop(struct cpuidle_device *dev, return index; } -static void check_and_cede_processor(void) -{ - /* - * Interrupts are soft-disabled at this point, - * but not hard disabled. So an interrupt might have - * occurred before entering NAP, and would be potentially - * lost (edge events, decrementer events, etc...) unless - * we first hard disable then check. - */ - hard_irq_disable(); - if (get_paca()->irq_happened == 0) - cede_processor(); -} - static int dedicated_cede_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -122,7 +108,7 @@ static int dedicated_cede_loop(struct cpuidle_device *dev, ppc64_runlatch_off(); HMT_medium(); - check_and_cede_processor(); + cede_processor(); get_lppaca()->donate_dedicated_cpu = 0; dev->last_residency = @@ -146,7 +132,7 @@ static int shared_cede_loop(struct cpuidle_device *dev, * processor. When returning here, external interrupts * are enabled. */ - check_and_cede_processor(); + cede_processor(); dev->last_residency = (int)idle_loop_epilog(in_purr, kt_before); diff --git a/trunk/arch/powerpc/xmon/xmon.c b/trunk/arch/powerpc/xmon/xmon.c index 974a47b3c9b8..cb95eea74d3d 100644 --- a/trunk/arch/powerpc/xmon/xmon.c +++ b/trunk/arch/powerpc/xmon/xmon.c @@ -1437,8 +1437,7 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%lx\n", current); #ifdef CONFIG_PPC64 - printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n", - local_paca, local_paca->soft_enabled, local_paca->irq_happened); + printf(" paca = 0x%lx\n", get_paca()); #endif if (current) { printf(" pid = %ld, comm = %s\n", @@ -1642,7 +1641,7 @@ static void super_regs(void) /* Dump out relevant Paca data areas. */ printf("Paca: \n"); - ptrPaca = local_paca; + ptrPaca = get_paca(); printf(" Local Processor Control Area (LpPaca): \n"); ptrLpPaca = ptrPaca->lppaca_ptr; @@ -2645,7 +2644,7 @@ static void dump_slb(void) static void dump_stab(void) { int i; - unsigned long *tmp = (unsigned long *)local_paca->stab_addr; + unsigned long *tmp = (unsigned long *)get_paca()->stab_addr; printf("Segment table contents of cpu %x\n", smp_processor_id());