From 300043b7ecec9ae88bad5650a5caab085d8b07a8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 31 Jan 2013 14:40:49 -0500 Subject: [PATCH] --- yaml --- r: 351100 b: refs/heads/master c: 85a4d2d41dc6d1c0296326204a857a9fab864a31 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/arm/kernel/process.c | 13 ++- trunk/arch/arm64/kernel/process.c | 13 ++- trunk/arch/blackfin/kernel/process.c | 7 ++ trunk/arch/cris/kernel/process.c | 11 +- trunk/arch/ia64/kernel/process.c | 3 + trunk/arch/ia64/kernel/setup.c | 1 + trunk/arch/m32r/kernel/process.c | 51 ++++++++- trunk/arch/microblaze/kernel/process.c | 3 + trunk/arch/mn10300/kernel/process.c | 7 ++ trunk/arch/openrisc/kernel/idle.c | 5 + trunk/arch/sh/kernel/idle.c | 12 +-- trunk/arch/sparc/include/asm/processor_32.h | 1 - trunk/arch/sparc/kernel/apc.c | 3 +- trunk/arch/sparc/kernel/leon_pmc.c | 5 +- trunk/arch/sparc/kernel/pmc.c | 3 +- trunk/arch/sparc/kernel/process_32.c | 7 +- trunk/arch/unicore32/kernel/process.c | 5 + trunk/arch/x86/Kconfig | 1 - trunk/arch/x86/kernel/apm_32.c | 57 ++++------ trunk/arch/x86/kernel/process.c | 31 +++--- trunk/drivers/idle/intel_idle.c | 114 +++++++++++--------- trunk/include/linux/pm.h | 1 + 23 files changed, 231 insertions(+), 125 deletions(-) diff --git a/[refs] b/[refs] index 1cc42d0eedda..bcec300e6ba6 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 558bd3e8dc7a798c5c845f90cf038b9bbd2df2b8 +refs/heads/master: 85a4d2d41dc6d1c0296326204a857a9fab864a31 diff --git a/trunk/arch/arm/kernel/process.c b/trunk/arch/arm/kernel/process.c index 047d3e40e470..c6dec5fc20aa 100644 --- a/trunk/arch/arm/kernel/process.c +++ b/trunk/arch/arm/kernel/process.c @@ -172,9 +172,14 @@ static void default_idle(void) local_irq_enable(); } +void (*pm_idle)(void) = default_idle; +EXPORT_SYMBOL(pm_idle); + /* - * The idle thread. - * We always respect 'hlt_counter' to prevent low power idle. + * The idle thread, has rather strange semantics for calling pm_idle, + * but this is what x86 does and we need to do the same, so that + * things like cpuidle get called in the same way. The only difference + * is that we always respect 'hlt_counter' to prevent low power idle. */ void cpu_idle(void) { @@ -205,10 +210,10 @@ void cpu_idle(void) } else if (!need_resched()) { stop_critical_timings(); if (cpuidle_idle_call()) - default_idle(); + pm_idle(); start_critical_timings(); /* - * default_idle functions must always + * pm_idle functions must always * return with IRQs enabled. */ WARN_ON(irqs_disabled()); diff --git a/trunk/arch/arm64/kernel/process.c b/trunk/arch/arm64/kernel/process.c index c7002d40a9b0..cb0956bc96ed 100644 --- a/trunk/arch/arm64/kernel/process.c +++ b/trunk/arch/arm64/kernel/process.c @@ -97,9 +97,14 @@ static void default_idle(void) local_irq_enable(); } +void (*pm_idle)(void) = default_idle; +EXPORT_SYMBOL_GPL(pm_idle); + /* - * The idle thread. - * We always respect 'hlt_counter' to prevent low power idle. + * The idle thread, has rather strange semantics for calling pm_idle, + * but this is what x86 does and we need to do the same, so that + * things like cpuidle get called in the same way. The only difference + * is that we always respect 'hlt_counter' to prevent low power idle. */ void cpu_idle(void) { @@ -117,10 +122,10 @@ void cpu_idle(void) local_irq_disable(); if (!need_resched()) { stop_critical_timings(); - default_idle(); + pm_idle(); start_critical_timings(); /* - * default_idle functions should always return + * pm_idle functions should always return * with IRQs enabled. */ WARN_ON(irqs_disabled()); diff --git a/trunk/arch/blackfin/kernel/process.c b/trunk/arch/blackfin/kernel/process.c index 8061426b7df5..3e16ad9b0a99 100644 --- a/trunk/arch/blackfin/kernel/process.c +++ b/trunk/arch/blackfin/kernel/process.c @@ -39,6 +39,12 @@ int nr_l1stack_tasks; void *l1_stack_base; unsigned long l1_stack_len; +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void) = NULL; +EXPORT_SYMBOL(pm_idle); + void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); @@ -75,6 +81,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + void (*idle)(void) = pm_idle; #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) diff --git a/trunk/arch/cris/kernel/process.c b/trunk/arch/cris/kernel/process.c index 104ff4dd9b98..7f65be6f7f17 100644 --- a/trunk/arch/cris/kernel/process.c +++ b/trunk/arch/cris/kernel/process.c @@ -54,6 +54,11 @@ void enable_hlt(void) EXPORT_SYMBOL(enable_hlt); +/* + * The following aren't currently used. + */ +void (*pm_idle)(void); + extern void default_idle(void); void (*pm_power_off)(void); @@ -72,12 +77,16 @@ void cpu_idle (void) while (1) { rcu_idle_enter(); while (!need_resched()) { + void (*idle)(void); /* * Mark this as an RCU critical section so that * synchronize_kernel() in the unload path waits * for our completion. */ - default_idle(); + idle = pm_idle; + if (!idle) + idle = default_idle; + idle(); } rcu_idle_exit(); schedule_preempt_disabled(); diff --git a/trunk/arch/ia64/kernel/process.c b/trunk/arch/ia64/kernel/process.c index e34f565f595a..31360cbbd5f8 100644 --- a/trunk/arch/ia64/kernel/process.c +++ b/trunk/arch/ia64/kernel/process.c @@ -57,6 +57,8 @@ void (*ia64_mark_idle)(int); unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); +void (*pm_idle) (void); +EXPORT_SYMBOL(pm_idle); void (*pm_power_off) (void); EXPORT_SYMBOL(pm_power_off); @@ -299,6 +301,7 @@ cpu_idle (void) if (mark_idle) (*mark_idle)(1); + idle = pm_idle; if (!idle) idle = default_idle; (*idle)(); diff --git a/trunk/arch/ia64/kernel/setup.c b/trunk/arch/ia64/kernel/setup.c index 2029cc0d2fc6..aaefd9b94f2f 100644 --- a/trunk/arch/ia64/kernel/setup.c +++ b/trunk/arch/ia64/kernel/setup.c @@ -1051,6 +1051,7 @@ cpu_init (void) max_num_phys_stacked = num_phys_stacked; } platform_cpu_init(); + pm_idle = default_idle; } void __init diff --git a/trunk/arch/m32r/kernel/process.c b/trunk/arch/m32r/kernel/process.c index bde899e155d3..765d0f57c787 100644 --- a/trunk/arch/m32r/kernel/process.c +++ b/trunk/arch/m32r/kernel/process.c @@ -44,9 +44,35 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return tsk->thread.lr; } +/* + * Powermanagement idle function, if any.. + */ +static void (*pm_idle)(void) = NULL; + void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); +/* + * We use this is we don't have any better + * idle routine.. + */ +static void default_idle(void) +{ + /* M32R_FIXME: Please use "cpu_sleep" mode. */ + cpu_relax(); +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle (void) +{ + /* M32R_FIXME */ + cpu_relax(); +} + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a @@ -58,8 +84,14 @@ void cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { rcu_idle_enter(); - while (!need_resched()) - cpu_relax(); + while (!need_resched()) { + void (*idle)(void) = pm_idle; + + if (!idle) + idle = default_idle; + + idle(); + } rcu_idle_exit(); schedule_preempt_disabled(); } @@ -88,6 +120,21 @@ void machine_power_off(void) /* M32R_FIXME */ } +static int __init idle_setup (char *str) +{ + if (!strncmp(str, "poll", 4)) { + printk("using poll in idle threads.\n"); + pm_idle = poll_idle; + } else if (!strncmp(str, "sleep", 4)) { + printk("using sleep in idle threads.\n"); + pm_idle = default_idle; + } + + return 1; +} + +__setup("idle=", idle_setup); + void show_regs(struct pt_regs * regs) { printk("\n"); diff --git a/trunk/arch/microblaze/kernel/process.c b/trunk/arch/microblaze/kernel/process.c index 6ff2dcff3410..a5b74f729e5b 100644 --- a/trunk/arch/microblaze/kernel/process.c +++ b/trunk/arch/microblaze/kernel/process.c @@ -41,6 +41,7 @@ void show_regs(struct pt_regs *regs) regs->msr, regs->ear, regs->esr, regs->fsr); } +void (*pm_idle)(void); void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); @@ -97,6 +98,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + void (*idle)(void) = pm_idle; + if (!idle) idle = default_idle; diff --git a/trunk/arch/mn10300/kernel/process.c b/trunk/arch/mn10300/kernel/process.c index 84f4e97e3074..eb09f5a552ff 100644 --- a/trunk/arch/mn10300/kernel/process.c +++ b/trunk/arch/mn10300/kernel/process.c @@ -36,6 +36,12 @@ #include #include "internal.h" +/* + * power management idle function, if any.. + */ +void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); + /* * return saved PC of a blocked thread. */ @@ -107,6 +113,7 @@ void cpu_idle(void) void (*idle)(void); smp_rmb(); + idle = pm_idle; if (!idle) { #if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) idle = poll_idle; diff --git a/trunk/arch/openrisc/kernel/idle.c b/trunk/arch/openrisc/kernel/idle.c index 5e8a3b6d6bc6..7d618feb1b72 100644 --- a/trunk/arch/openrisc/kernel/idle.c +++ b/trunk/arch/openrisc/kernel/idle.c @@ -39,6 +39,11 @@ void (*powersave) (void) = NULL; +static inline void pm_idle(void) +{ + barrier(); +} + void cpu_idle(void) { set_thread_flag(TIF_POLLING_NRFLAG); diff --git a/trunk/arch/sh/kernel/idle.c b/trunk/arch/sh/kernel/idle.c index 3d5a1b387cc0..0c910163caa3 100644 --- a/trunk/arch/sh/kernel/idle.c +++ b/trunk/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ #include #include -static void (*sh_idle)(void); +void (*pm_idle)(void); static int hlt_counter; @@ -103,9 +103,9 @@ void cpu_idle(void) /* Don't trace irqs off for idle */ stop_critical_timings(); if (cpuidle_idle_call()) - sh_idle(); + pm_idle(); /* - * Sanity check to ensure that sh_idle() returns + * Sanity check to ensure that pm_idle() returns * with IRQs enabled */ WARN_ON(irqs_disabled()); @@ -123,13 +123,13 @@ void __init select_idle_routine(void) /* * If a platform has set its own idle routine, leave it alone. */ - if (sh_idle) + if (pm_idle) return; if (hlt_works()) - sh_idle = default_idle; + pm_idle = default_idle; else - sh_idle = poll_idle; + pm_idle = poll_idle; } void stop_this_cpu(void *unused) diff --git a/trunk/arch/sparc/include/asm/processor_32.h b/trunk/arch/sparc/include/asm/processor_32.h index 2c7baa4c4505..c1e01914fd98 100644 --- a/trunk/arch/sparc/include/asm/processor_32.h +++ b/trunk/arch/sparc/include/asm/processor_32.h @@ -118,7 +118,6 @@ extern unsigned long get_wchan(struct task_struct *); extern struct task_struct *last_task_used_math; #define cpu_relax() barrier() -extern void (*sparc_idle)(void); #endif diff --git a/trunk/arch/sparc/kernel/apc.c b/trunk/arch/sparc/kernel/apc.c index eefda32b595e..348fa1aeabce 100644 --- a/trunk/arch/sparc/kernel/apc.c +++ b/trunk/arch/sparc/kernel/apc.c @@ -20,7 +20,6 @@ #include #include #include -#include /* Debugging * @@ -159,7 +158,7 @@ static int apc_probe(struct platform_device *op) /* Assign power management IDLE handler */ if (!apc_no_idle) - sparc_idle = apc_swift_idle; + pm_idle = apc_swift_idle; printk(KERN_INFO "%s: power management initialized%s\n", APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : ""); diff --git a/trunk/arch/sparc/kernel/leon_pmc.c b/trunk/arch/sparc/kernel/leon_pmc.c index 708bca435219..4e174321097d 100644 --- a/trunk/arch/sparc/kernel/leon_pmc.c +++ b/trunk/arch/sparc/kernel/leon_pmc.c @@ -9,7 +9,6 @@ #include #include #include -#include /* List of Systems that need fixup instructions around power-down instruction */ unsigned int pmc_leon_fixup_ids[] = { @@ -70,9 +69,9 @@ static int __init leon_pmc_install(void) if (sparc_cpu_model == sparc_leon) { /* Assign power management IDLE handler */ if (pmc_leon_need_fixup()) - sparc_idle = pmc_leon_idle_fixup; + pm_idle = pmc_leon_idle_fixup; else - sparc_idle = pmc_leon_idle; + pm_idle = pmc_leon_idle; printk(KERN_INFO "leon: power management initialized\n"); } diff --git a/trunk/arch/sparc/kernel/pmc.c b/trunk/arch/sparc/kernel/pmc.c index 8b7297faca79..dcbb62f63068 100644 --- a/trunk/arch/sparc/kernel/pmc.c +++ b/trunk/arch/sparc/kernel/pmc.c @@ -17,7 +17,6 @@ #include #include #include -#include /* Debug * @@ -64,7 +63,7 @@ static int pmc_probe(struct platform_device *op) #ifndef PMC_NO_IDLE /* Assign power management IDLE handler */ - sparc_idle = pmc_swift_idle; + pm_idle = pmc_swift_idle; #endif printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME); diff --git a/trunk/arch/sparc/kernel/process_32.c b/trunk/arch/sparc/kernel/process_32.c index 62eede13831a..be8e862badaf 100644 --- a/trunk/arch/sparc/kernel/process_32.c +++ b/trunk/arch/sparc/kernel/process_32.c @@ -43,7 +43,8 @@ * Power management idle function * Set in pm platform drivers (apc.c and pmc.c) */ -void (*sparc_idle)(void); +void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); /* * Power-off handler instantiation for pm.h compliance @@ -74,8 +75,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ for (;;) { while (!need_resched()) { - if (sparc_idle) - (*sparc_idle)(); + if (pm_idle) + (*pm_idle)(); else cpu_relax(); } diff --git a/trunk/arch/unicore32/kernel/process.c b/trunk/arch/unicore32/kernel/process.c index 872d7e22d847..62bad9fed03e 100644 --- a/trunk/arch/unicore32/kernel/process.c +++ b/trunk/arch/unicore32/kernel/process.c @@ -45,6 +45,11 @@ static const char * const processor_modes[] = { "UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR" }; +/* + * The idle thread, has rather strange semantics for calling pm_idle, + * but this is what x86 does and we need to do the same, so that + * things like cpuidle get called in the same way. + */ void cpu_idle(void) { /* endless idle loop with no priority at all */ diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 1b635861401c..225543bf45a5 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -1912,7 +1912,6 @@ config APM_DO_ENABLE this feature. config APM_CPU_IDLE - depends on CPU_IDLE bool "Make CPU Idle calls when idle" ---help--- Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. diff --git a/trunk/arch/x86/kernel/apm_32.c b/trunk/arch/x86/kernel/apm_32.c index 9f4bc6a1164d..d65464e43503 100644 --- a/trunk/arch/x86/kernel/apm_32.c +++ b/trunk/arch/x86/kernel/apm_32.c @@ -232,7 +232,6 @@ #include #include #include -#include #include #include @@ -361,35 +360,13 @@ struct apm_user { * idle percentage above which bios idle calls are done */ #ifdef CONFIG_APM_CPU_IDLE +#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 #define DEFAULT_IDLE_THRESHOLD 95 #else #define DEFAULT_IDLE_THRESHOLD 100 #endif #define DEFAULT_IDLE_PERIOD (100 / 3) -static int apm_cpu_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index); - -static struct cpuidle_driver apm_idle_driver = { - .name = "apm_idle", - .owner = THIS_MODULE, - .en_core_tk_irqen = 1, - .states = { - { /* entry 0 is for polling */ }, - { /* entry 1 is for APM idle */ - .name = "APM", - .desc = "APM idle", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 250, /* WAG */ - .target_residency = 500, /* WAG */ - .enter = &apm_cpu_idle - }, - }, - .state_count = 2, -}; - -static struct cpuidle_device apm_cpuidle_device; - /* * Local variables */ @@ -400,6 +377,7 @@ static struct { static int clock_slowed; static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; +static int set_pm_idle; static int suspends_pending; static int standbys_pending; static int ignore_sys_suspend; @@ -906,6 +884,8 @@ static void apm_do_busy(void) #define IDLE_CALC_LIMIT (HZ * 100) #define IDLE_LEAKY_MAX 16 +static void (*original_pm_idle)(void) __read_mostly; + /** * apm_cpu_idle - cpu idling for APM capable Linux * @@ -914,8 +894,7 @@ static void apm_do_busy(void) * Furthermore it calls the system default idle routine. */ -static int apm_cpu_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static void apm_cpu_idle(void) { static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ @@ -925,6 +904,7 @@ static int apm_cpu_idle(struct cpuidle_device *dev, unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int bucket; + WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; @@ -970,7 +950,10 @@ static int apm_cpu_idle(struct cpuidle_device *dev, break; } } - default_idle(); + if (original_pm_idle) + original_pm_idle(); + else + default_idle(); local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) @@ -980,7 +963,7 @@ static int apm_cpu_idle(struct cpuidle_device *dev, if (apm_idle_done) apm_do_busy(); - return index; + local_irq_enable(); } /** @@ -2398,9 +2381,9 @@ static int __init apm_init(void) if (HZ != 100) idle_period = (idle_period * HZ) / 100; if (idle_threshold < 100) { - if (!cpuidle_register_driver(&apm_idle_driver)) - if (cpuidle_register_device(&apm_cpuidle_device)) - cpuidle_unregister_driver(&apm_idle_driver); + original_pm_idle = pm_idle; + pm_idle = apm_cpu_idle; + set_pm_idle = 1; } return 0; @@ -2410,9 +2393,15 @@ static void __exit apm_exit(void) { int error; - cpuidle_unregister_device(&apm_cpuidle_device); - cpuidle_unregister_driver(&apm_idle_driver); - + if (set_pm_idle) { + pm_idle = original_pm_idle; + /* + * We are about to unload the current idle thread pm callback + * (pm_idle), Wait for all processors to update cached/local + * copies of pm_idle before proceeding. + */ + kick_all_cpus_sync(); + } if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) && (apm_info.connection_version > 0x0100)) { error = apm_engage_power_management(APM_DEVICE_ALL, 0); diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c index ceb05db59be1..2ed787f15bf0 100644 --- a/trunk/arch/x86/kernel/process.c +++ b/trunk/arch/x86/kernel/process.c @@ -268,7 +268,13 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -static void (*x86_idle)(void); +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(pm_idle); +#endif #ifndef CONFIG_SMP static inline void play_dead(void) @@ -345,7 +351,7 @@ void cpu_idle(void) rcu_idle_enter(); if (cpuidle_idle_call()) - x86_idle(); + pm_idle(); rcu_idle_exit(); start_critical_timings(); @@ -392,9 +398,9 @@ EXPORT_SYMBOL(default_idle); bool set_pm_idle_to_default(void) { - bool ret = !!x86_idle; + bool ret = !!pm_idle; - x86_idle = default_idle; + pm_idle = default_idle; return ret; } @@ -561,10 +567,11 @@ static void amd_e400_idle(void) void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - if (x86_idle == poll_idle && smp_num_siblings > 1) + if (pm_idle == poll_idle && smp_num_siblings > 1) { pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); + } #endif - if (x86_idle) + if (pm_idle) return; if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { @@ -572,19 +579,19 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) * One CPU supports mwait => All CPUs supports mwait */ pr_info("using mwait in idle threads\n"); - x86_idle = mwait_idle; + pm_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); - x86_idle = amd_e400_idle; + pm_idle = amd_e400_idle; } else - x86_idle = default_idle; + pm_idle = default_idle; } void __init init_amd_e400_c1e_mask(void) { /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ - if (x86_idle == amd_e400_idle) + if (pm_idle == amd_e400_idle) zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); } @@ -595,7 +602,7 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { pr_info("using polling idle threads\n"); - x86_idle = poll_idle; + pm_idle = poll_idle; boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; @@ -608,7 +615,7 @@ static int __init idle_setup(char *str) * To continue to load the CPU idle driver, don't touch * the boot_option_idle_override. */ - x86_idle = default_idle; + pm_idle = default_idle; boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* diff --git a/trunk/drivers/idle/intel_idle.c b/trunk/drivers/idle/intel_idle.c index 2df9414a72f7..fa714774b960 100644 --- a/trunk/drivers/idle/intel_idle.c +++ b/trunk/drivers/idle/intel_idle.c @@ -108,6 +108,16 @@ static struct cpuidle_state *cpuidle_state_table; */ #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 +/* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. + * + * We store the hint at the top of our "flags" for each state. + */ +#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) +#define MWAIT2flg(eax) ((eax & 0xFF) << 24) + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -118,21 +128,21 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-NHM", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 3, .target_residency = 6, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C3-NHM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "C6-NHM", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle }, @@ -143,28 +153,28 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-SNB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C3-SNB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "C6-SNB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, .enter = &intel_idle }, { /* MWAIT C4 */ .name = "C7-SNB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, .enter = &intel_idle }, @@ -175,46 +185,78 @@ static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C1 */ .name = "C1-IVB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C3-IVB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "C6-IVB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, .enter = &intel_idle }, { /* MWAIT C4 */ .name = "C7-IVB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, .enter = &intel_idle }, }; +static struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = { + { /* MWAIT C0 */ }, + { /* MWAIT C1 */ + .name = "C1-HSW", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { /* MWAIT C2 */ + .name = "C3-HSW", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 33, + .target_residency = 100, + .enter = &intel_idle }, + { /* MWAIT C3 */ + .name = "C6-HSW", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, + .target_residency = 400, + .enter = &intel_idle }, + { /* MWAIT C4 */ + .name = "C7s-HSW", + .desc = "MWAIT 0x32", + .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 166, + .target_residency = 500, + .enter = &intel_idle }, +}; + static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C0 */ }, { /* MWAIT C1 */ .name = "C1-ATM", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 4, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "C2-ATM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, @@ -222,7 +264,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C4 */ .name = "C4-ATM", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, .enter = &intel_idle }, @@ -230,41 +272,12 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { { /* MWAIT C6 */ .name = "C6-ATM", .desc = "MWAIT 0x52", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, .enter = &intel_idle }, }; -static long get_driver_data(int cstate) -{ - int driver_data; - switch (cstate) { - - case 1: /* MWAIT C1 */ - driver_data = 0x00; - break; - case 2: /* MWAIT C2 */ - driver_data = 0x10; - break; - case 3: /* MWAIT C3 */ - driver_data = 0x20; - break; - case 4: /* MWAIT C4 */ - driver_data = 0x30; - break; - case 5: /* MWAIT C5 */ - driver_data = 0x40; - break; - case 6: /* MWAIT C6 */ - driver_data = 0x52; - break; - default: - driver_data = 0x00; - } - return driver_data; -} - /** * intel_idle * @dev: cpuidle_device @@ -278,8 +291,7 @@ static int intel_idle(struct cpuidle_device *dev, { unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); + unsigned long eax = flg2MWAIT(state->flags); unsigned int cstate; int cpu = smp_processor_id(); @@ -385,6 +397,10 @@ static const struct idle_cpu idle_cpu_ivb = { .state_table = ivb_cstates, }; +static const struct idle_cpu idle_cpu_hsw = { + .state_table = hsw_cstates, +}; + #define ICPU(model, cpu) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } @@ -402,6 +418,9 @@ static const struct x86_cpu_id intel_idle_ids[] = { ICPU(0x2d, idle_cpu_snb), ICPU(0x3a, idle_cpu_ivb), ICPU(0x3e, idle_cpu_ivb), + ICPU(0x3c, idle_cpu_hsw), + ICPU(0x3f, idle_cpu_hsw), + ICPU(0x45, idle_cpu_hsw), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); @@ -558,9 +577,6 @@ static int intel_idle_cpu_init(int cpu) if (cpuidle_state_table[cstate].enter == NULL) continue; - dev->states_usage[dev->state_count].driver_data = - (void *)get_driver_data(cstate); - dev->state_count += 1; } diff --git a/trunk/include/linux/pm.h b/trunk/include/linux/pm.h index 97bcf23e045a..03d7bb145311 100644 --- a/trunk/include/linux/pm.h +++ b/trunk/include/linux/pm.h @@ -31,6 +31,7 @@ /* * Callbacks for platform drivers to implement. */ +extern void (*pm_idle)(void); extern void (*pm_power_off)(void); extern void (*pm_power_off_prepare)(void);