Skip to content

Commit

Permalink
x86/asm/delay: Introduce an MWAITX-based delay with a configurable timer
Browse files Browse the repository at this point in the history
MWAITX can enable a timer and a corresponding timer value
specified in SW P0 clocks. The SW P0 frequency is the same as
TSC. The timer provides an upper bound on how long the
instruction waits before exiting.

This way, a delay function in the kernel can leverage that
MWAITX timer of MWAITX.

When a CPU core executes MWAITX, it will be quiesced in a
waiting phase, diminishing its power consumption. This way, we
can save power in comparison to our default TSC-based delays.

A simple test shows that:

	$ cat /sys/bus/pci/devices/0000\:00\:18.4/hwmon/hwmon0/power1_acc
	$ sleep 10000s
	$ cat /sys/bus/pci/devices/0000\:00\:18.4/hwmon/hwmon0/power1_acc

Results:

	* TSC-based default delay:      485115 uWatts average power
	* MWAITX-based delay:           252738 uWatts average power

Thus, that's about 240 milliWatts less power consumption. The
test method relies on the support of AMD CPU accumulated power
algorithm in fam15h_power for which patches are forthcoming.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Borislav Petkov <bp@suse.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
[ Fix delay truncation. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andreas Herrmann <herrmann.der.user@gmail.com>
Cc: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hector Marco-Gisbert <hecmargi@upv.es>
Cc: Jacob Shin <jacob.w.shin@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Li <tony.li@amd.com>
Link: http://lkml.kernel.org/r/1438744732-1459-3-git-send-email-ray.huang@amd.com
Link: http://lkml.kernel.org/r/1439201994-28067-4-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Huang Rui authored and Ingo Molnar committed Aug 22, 2015
1 parent f967567 commit b466bdb
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
1 change: 1 addition & 0 deletions arch/x86/include/asm/delay.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
#include <asm-generic/delay.h>

void use_tsc_delay(void);
void use_mwaitx_delay(void);

#endif /* _ASM_X86_DELAY_H */
4 changes: 4 additions & 0 deletions arch/x86/kernel/cpu/amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>

#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
Expand Down Expand Up @@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
/* A random value per boot for bit slice [12:upper_bit) */
va_align.bits = get_random_int() & va_align.mask;
}

if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
}

static void early_init_amd(struct cpuinfo_x86 *c)
Expand Down
47 changes: 46 additions & 1 deletion arch/x86/lib/delay.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
#include <asm/mwait.h>

#ifdef CONFIG_SMP
# include <asm/smp.h>
Expand Down Expand Up @@ -83,6 +84,44 @@ static void delay_tsc(unsigned long __loops)
preempt_enable();
}

/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
* counts with TSC frequency. The input value is the loop of the
* counter, it will exit when the timer expires.
*/
static void delay_mwaitx(unsigned long __loops)
{
u64 start, end, delay, loops = __loops;

start = rdtsc_ordered();

for (;;) {
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);

/*
* Use cpu_tss as a cacheline-aligned, seldomly
* accessed per-cpu variable as the monitor target.
*/
__monitorx(this_cpu_ptr(&cpu_tss), 0, 0);

/*
* AMD, like Intel, supports the EAX hint and EAX=0xf
* means, do not enter any deep C-state and we use it
* here in delay() to minimize wakeup latency.
*/
__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);

end = rdtsc_ordered();

if (loops <= end - start)
break;

loops -= end - start;

start = end;
}
}

/*
* Since we calibrate only once at boot, this
* function should be set once at boot and not changed
Expand All @@ -91,7 +130,13 @@ static void (*delay_fn)(unsigned long) = delay_loop;

void use_tsc_delay(void)
{
delay_fn = delay_tsc;
if (delay_fn == delay_loop)
delay_fn = delay_tsc;
}

void use_mwaitx_delay(void)
{
delay_fn = delay_mwaitx;
}

int read_current_timer(unsigned long *timer_val)
Expand Down

0 comments on commit b466bdb

Please sign in to comment.