Skip to content

Commit

Permalink
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/linux-2.6-tip

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86: vdso: Remove unused variable
  x86-64: Optimize vDSO time()
  x86-64: Add time to vDSO
  x86-64: Turn off -pg and turn on -foptimize-sibling-calls for vDSO
  x86-64: Move vread_tsc into a new file with sensible options
  x86-64: Vclock_gettime(CLOCK_MONOTONIC) can't ever see nsec < 0
  x86-64: Don't generate cmov in vread_tsc
  x86-64: Remove unnecessary barrier in vread_tsc
  x86-64: Clean up vdso/kernel shared variables
  • Loading branch information
Linus Torvalds committed May 26, 2011
2 parents fce637e + e9d3594 commit 14587a2
Show file tree
Hide file tree
Showing 18 changed files with 202 additions and 185 deletions.
4 changes: 4 additions & 0 deletions arch/x86/include/asm/tsc.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern unsigned long native_calibrate_tsc(void);

#ifdef CONFIG_X86_64
extern cycles_t vread_tsc(void);
#endif

/*
* Boot-time check whether the TSCs are synchronized across
* all CPUs/cores:
Expand Down
14 changes: 0 additions & 14 deletions arch/x86/include/asm/vdso.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,6 @@
#ifndef _ASM_X86_VDSO_H
#define _ASM_X86_VDSO_H

#ifdef CONFIG_X86_64
extern const char VDSO64_PRELINK[];

/*
* Given a pointer to the vDSO image, find the pointer to VDSO64_name
* as that symbol is defined in the vDSO sources or linker script.
*/
#define VDSO64_SYMBOL(base, name) \
({ \
extern const char VDSO64_##name[]; \
(void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \
})
#endif

#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
extern const char VDSO32_PRELINK[];

Expand Down
2 changes: 0 additions & 2 deletions arch/x86/include/asm/vgtod.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ struct vsyscall_gtod_data {
struct timespec wall_to_monotonic;
struct timespec wall_time_coarse;
};
extern struct vsyscall_gtod_data __vsyscall_gtod_data
__section_vsyscall_gtod_data;
extern struct vsyscall_gtod_data vsyscall_gtod_data;

#endif /* _ASM_X86_VGTOD_H */
12 changes: 2 additions & 10 deletions arch/x86/include/asm/vsyscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,19 @@ enum vsyscall_num {
#ifdef __KERNEL__
#include <linux/seqlock.h>

#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))

/* Definitions for CONFIG_GENERIC_TIME definitions */
#define __section_vsyscall_gtod_data __attribute__ \
((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
#define __section_vsyscall_clock __attribute__ \
((unused, __section__ (".vsyscall_clock"),aligned(16)))
#define __vsyscall_fn \
__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace

#define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2

extern int __vgetcpu_mode;
extern volatile unsigned long __jiffies;

/* kernel space (writeable) */
extern int vgetcpu_mode;
extern struct timezone sys_tz;

#include <asm/vvar.h>

extern void map_vsyscall(void);

#endif /* __KERNEL__ */
Expand Down
52 changes: 52 additions & 0 deletions arch/x86/include/asm/vvar.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* vvar.h: Shared vDSO/kernel variable declarations
* Copyright (c) 2011 Andy Lutomirski
* Subject to the GNU General Public License, version 2
*
* A handful of variables are accessible (read-only) from userspace
* code in the vsyscall page and the vdso. They are declared here.
* Some other file must define them with DEFINE_VVAR.
*
* In normal kernel code, they are used like any other variable.
* In user code, they are accessed through the VVAR macro.
*
* Each of these variables lives in the vsyscall page, and each
* one needs a unique offset within the little piece of the page
* reserved for vvars. Specify that offset in DECLARE_VVAR.
* (There are 896 bytes available. If you mess up, the linker will
* catch it.)
*/

/* Offset of vars within vsyscall page */
#define VSYSCALL_VARS_OFFSET (3072 + 128)

#if defined(__VVAR_KERNEL_LDS)

/* The kernel linker script defines its own magic to put vvars in the
* right place.
*/
#define DECLARE_VVAR(offset, type, name) \
EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset)

#else

#define DECLARE_VVAR(offset, type, name) \
static type const * const vvaraddr_ ## name = \
(void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset));

#define DEFINE_VVAR(type, name) \
type __vvar_ ## name \
__attribute__((section(".vsyscall_var_" #name), aligned(16)))

#define VVAR(name) (*vvaraddr_ ## name)

#endif

/* DECLARE_VVAR(offset, type, name) */

DECLARE_VVAR(0, volatile unsigned long, jiffies)
DECLARE_VVAR(8, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)

#undef DECLARE_VVAR
#undef VSYSCALL_VARS_OFFSET
8 changes: 5 additions & 3 deletions arch/x86/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)

ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
Expand All @@ -24,13 +23,16 @@ endif
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
CFLAGS_vread_tsc_64.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp)
GCOV_PROFILE_vsyscall_64.o := n
GCOV_PROFILE_hpet.o := n
GCOV_PROFILE_tsc.o := n
GCOV_PROFILE_paravirt.o := n

# vread_tsc_64 is hot and should be fully optimized:
CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls

obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
Expand All @@ -39,7 +41,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <asm/time.h>

#ifdef CONFIG_X86_64
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
#endif

unsigned long profile_pc(struct pt_regs *regs)
Expand Down
19 changes: 0 additions & 19 deletions arch/x86/kernel/tsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs)
ret : clocksource_tsc.cycle_last;
}

#ifdef CONFIG_X86_64
static cycle_t __vsyscall_fn vread_tsc(void)
{
cycle_t ret;

/*
* Surround the RDTSC by barriers, to make sure it's not
* speculated to outside the seqlock critical section and
* does not cause time warps:
*/
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
rdtsc_barrier();

return ret >= __vsyscall_gtod_data.clock.cycle_last ?
ret : __vsyscall_gtod_data.clock.cycle_last;
}
#endif

static void resume_tsc(struct clocksource *cs)
{
clocksource_tsc.cycle_last = 0;
Expand Down
34 changes: 11 additions & 23 deletions arch/x86/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ SECTIONS

#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
ADDR(.vsyscall_0) + offset \
: AT(VLOAD(.vsyscall_var_ ## x)) { \
*(.vsyscall_var_ ## x) \
} \
x = VVIRT(.vsyscall_var_ ## x);

. = ALIGN(4096);
__vsyscall_0 = .;
Expand All @@ -175,47 +181,29 @@ SECTIONS
*(.vsyscall_fn)
}

. = ALIGN(L1_CACHE_BYTES);
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
*(.vsyscall_gtod_data)
}

vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
*(.vsyscall_clock)
}
vsyscall_clock = VVIRT(.vsyscall_clock);


.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
*(.vsyscall_1)
}
.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
*(.vsyscall_2)
}

.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
*(.vgetcpu_mode)
}
vgetcpu_mode = VVIRT(.vgetcpu_mode);

. = ALIGN(L1_CACHE_BYTES);
.jiffies : AT(VLOAD(.jiffies)) {
*(.jiffies)
}
jiffies = VVIRT(.jiffies);

.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
*(.vsyscall_3)
}

#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
#undef __VVAR_KERNEL_LDS

. = __vsyscall_0 + PAGE_SIZE;

#undef VSYSCALL_ADDR
#undef VLOAD_OFFSET
#undef VLOAD
#undef VVIRT_OFFSET
#undef VVIRT
#undef EMIT_VVAR

#endif /* CONFIG_X86_64 */

Expand Down
36 changes: 36 additions & 0 deletions arch/x86/kernel/vread_tsc_64.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* This code runs in userspace. */

#define DISABLE_BRANCH_PROFILING
#include <asm/vgtod.h>

notrace cycle_t __vsyscall_fn vread_tsc(void)
{
cycle_t ret;
u64 last;

/*
* Empirically, a fence (of type that depends on the CPU)
* before rdtsc is enough to ensure that rdtsc is ordered
* with respect to loads. The various CPU manuals are unclear
* as to whether rdtsc can be reordered with later loads,
* but no one has ever seen it happen.
*/
rdtsc_barrier();
ret = (cycle_t)vget_cycles();

last = VVAR(vsyscall_gtod_data).clock.cycle_last;

if (likely(ret >= last))
return ret;

/*
* GCC likes to generate cmov here, but this branch is extremely
* predictable (it's just a funciton of time and the likely is
* very likely) and there's a data dependence, so force GCC
* to generate a branch instead. I don't barrier() because
* we don't actually need a barrier, and if this function
* ever gets inlined it will generate worse code.
*/
asm volatile ("");
return last;
}
46 changes: 20 additions & 26 deletions arch/x86/kernel/vsyscall_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,8 @@
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"

/*
* vsyscall_gtod_data contains data that is :
* - readonly from vsyscalls
* - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
* Try to keep this structure as small as possible to avoid cache line ping pongs
*/
int __vgetcpu_mode __section_vgetcpu_mode;

struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
.sysctl_enabled = 1,
Expand Down Expand Up @@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
*/
static __always_inline void do_get_tz(struct timezone * tz)
{
*tz = __vsyscall_gtod_data.sys_tz;
*tz = VVAR(vsyscall_gtod_data).sys_tz;
}

static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
Expand Down Expand Up @@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
unsigned long mult, shift, nsec;
cycle_t (*vread)(void);
do {
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);

vread = __vsyscall_gtod_data.clock.vread;
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
vread = VVAR(vsyscall_gtod_data).clock.vread;
if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
!vread)) {
gettimeofday(tv,NULL);
return;
}

now = vread();
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
shift = __vsyscall_gtod_data.clock.shift;
base = VVAR(vsyscall_gtod_data).clock.cycle_last;
mask = VVAR(vsyscall_gtod_data).clock.mask;
mult = VVAR(vsyscall_gtod_data).clock.mult;
shift = VVAR(vsyscall_gtod_data).clock.shift;

tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
nsec = __vsyscall_gtod_data.wall_time_nsec;
} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
} while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));

/* calculate interval: */
cycle_delta = (now - base) & mask;
Expand Down Expand Up @@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t)
{
unsigned seq;
time_t result;
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
return time_syscall(t);

do {
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);

result = __vsyscall_gtod_data.wall_time_sec;
result = VVAR(vsyscall_gtod_data).wall_time_sec;

} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
} while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));

if (t)
*t = result;
Expand Down Expand Up @@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
We do this here because otherwise user space would do it on
its own in a likely inferior way (no access to jiffies).
If you don't like it pass NULL. */
if (tcache && tcache->blob[0] == (j = __jiffies)) {
if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) {
p = tcache->blob[1];
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
} else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
native_read_tscp(&p);
} else {
Expand Down
Loading

0 comments on commit 14587a2

Please sign in to comment.