Skip to content

Commit

Permalink
x86-64: Clean up vdso/kernel shared variables
Browse files Browse the repository at this point in the history
Variables that are shared between the vdso and the kernel are
currently a bit of a mess.  They are each defined with their own
magic, they are accessed differently in the kernel, the vsyscall page,
and the vdso, and one of them (vsyscall_clock) doesn't even really
exist.

This changes them all to use a common mechanism.  All of them are
delcared in vvar.h with a fixed address (validated by the linker
script).  In the kernel (as before), they look like ordinary
read-write variables.  In the vsyscall page and the vdso, they are
accessed through a new macro VVAR, which gives read-only access.

The vdso is now loaded verbatim into memory without any fixups.  As a
side bonus, access from the vdso is faster because a level of
indirection is removed.

While we're at it, pack jiffies and vgetcpu_mode into the same
cacheline.

Signed-off-by: Andy Lutomirski <luto@mit.edu>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Borislav Petkov <bp@amd64.org>
Link: http://lkml.kernel.org/r/%3C7357882fbb51fa30491636a7b6528747301b7ee9.1306156808.git.luto%40mit.edu%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  • Loading branch information
Andy Lutomirski authored and Thomas Gleixner committed May 24, 2011
1 parent d762f43 commit 8c49d9a
Show file tree
Hide file tree
Showing 15 changed files with 91 additions and 145 deletions.
14 changes: 0 additions & 14 deletions arch/x86/include/asm/vdso.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,6 @@
#ifndef _ASM_X86_VDSO_H
#define _ASM_X86_VDSO_H

#ifdef CONFIG_X86_64
extern const char VDSO64_PRELINK[];

/*
* Given a pointer to the vDSO image, find the pointer to VDSO64_name
* as that symbol is defined in the vDSO sources or linker script.
*/
#define VDSO64_SYMBOL(base, name) \
({ \
extern const char VDSO64_##name[]; \
(void *)(VDSO64_##name - VDSO64_PRELINK + (unsigned long)(base)); \
})
#endif

#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
extern const char VDSO32_PRELINK[];

Expand Down
2 changes: 0 additions & 2 deletions arch/x86/include/asm/vgtod.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ struct vsyscall_gtod_data {
struct timespec wall_to_monotonic;
struct timespec wall_time_coarse;
};
extern struct vsyscall_gtod_data __vsyscall_gtod_data
__section_vsyscall_gtod_data;
extern struct vsyscall_gtod_data vsyscall_gtod_data;

#endif /* _ASM_X86_VGTOD_H */
12 changes: 2 additions & 10 deletions arch/x86/include/asm/vsyscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,19 @@ enum vsyscall_num {
#ifdef __KERNEL__
#include <linux/seqlock.h>

#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))

/* Definitions for CONFIG_GENERIC_TIME definitions */
#define __section_vsyscall_gtod_data __attribute__ \
((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
#define __section_vsyscall_clock __attribute__ \
((unused, __section__ (".vsyscall_clock"),aligned(16)))
#define __vsyscall_fn \
__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace

#define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2

extern int __vgetcpu_mode;
extern volatile unsigned long __jiffies;

/* kernel space (writeable) */
extern int vgetcpu_mode;
extern struct timezone sys_tz;

#include <asm/vvar.h>

extern void map_vsyscall(void);

#endif /* __KERNEL__ */
Expand Down
52 changes: 52 additions & 0 deletions arch/x86/include/asm/vvar.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* vvar.h: Shared vDSO/kernel variable declarations
* Copyright (c) 2011 Andy Lutomirski
* Subject to the GNU General Public License, version 2
*
* A handful of variables are accessible (read-only) from userspace
* code in the vsyscall page and the vdso. They are declared here.
* Some other file must define them with DEFINE_VVAR.
*
* In normal kernel code, they are used like any other variable.
* In user code, they are accessed through the VVAR macro.
*
* Each of these variables lives in the vsyscall page, and each
* one needs a unique offset within the little piece of the page
* reserved for vvars. Specify that offset in DECLARE_VVAR.
* (There are 896 bytes available. If you mess up, the linker will
* catch it.)
*/

/* Offset of vars within vsyscall page */
#define VSYSCALL_VARS_OFFSET (3072 + 128)

#if defined(__VVAR_KERNEL_LDS)

/* The kernel linker script defines its own magic to put vvars in the
* right place.
*/
#define DECLARE_VVAR(offset, type, name) \
EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset)

#else

#define DECLARE_VVAR(offset, type, name) \
static type const * const vvaraddr_ ## name = \
(void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset));

#define DEFINE_VVAR(type, name) \
type __vvar_ ## name \
__attribute__((section(".vsyscall_var_" #name), aligned(16)))

#define VVAR(name) (*vvaraddr_ ## name)

#endif

/* DECLARE_VVAR(offset, type, name) */

DECLARE_VVAR(0, volatile unsigned long, jiffies)
DECLARE_VVAR(8, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)

#undef DECLARE_VVAR
#undef VSYSCALL_VARS_OFFSET
2 changes: 1 addition & 1 deletion arch/x86/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <asm/time.h>

#ifdef CONFIG_X86_64
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
#endif

unsigned long profile_pc(struct pt_regs *regs)
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/tsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -777,8 +777,8 @@ static cycle_t __vsyscall_fn vread_tsc(void)
ret = (cycle_t)vget_cycles();
rdtsc_barrier();

return ret >= __vsyscall_gtod_data.clock.cycle_last ?
ret : __vsyscall_gtod_data.clock.cycle_last;
return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ?
ret : VVAR(vsyscall_gtod_data).clock.cycle_last;
}
#endif

Expand Down
34 changes: 11 additions & 23 deletions arch/x86/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ SECTIONS

#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
ADDR(.vsyscall_0) + offset \
: AT(VLOAD(.vsyscall_var_ ## x)) { \
*(.vsyscall_var_ ## x) \
} \
x = VVIRT(.vsyscall_var_ ## x);

. = ALIGN(4096);
__vsyscall_0 = .;
Expand All @@ -175,47 +181,29 @@ SECTIONS
*(.vsyscall_fn)
}

. = ALIGN(L1_CACHE_BYTES);
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
*(.vsyscall_gtod_data)
}

vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
*(.vsyscall_clock)
}
vsyscall_clock = VVIRT(.vsyscall_clock);


.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
*(.vsyscall_1)
}
.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
*(.vsyscall_2)
}

.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
*(.vgetcpu_mode)
}
vgetcpu_mode = VVIRT(.vgetcpu_mode);

. = ALIGN(L1_CACHE_BYTES);
.jiffies : AT(VLOAD(.jiffies)) {
*(.jiffies)
}
jiffies = VVIRT(.jiffies);

.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
*(.vsyscall_3)
}

#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
#undef __VVAR_KERNEL_LDS

. = __vsyscall_0 + PAGE_SIZE;

#undef VSYSCALL_ADDR
#undef VLOAD_OFFSET
#undef VLOAD
#undef VVIRT_OFFSET
#undef VVIRT
#undef EMIT_VVAR

#endif /* CONFIG_X86_64 */

Expand Down
46 changes: 20 additions & 26 deletions arch/x86/kernel/vsyscall_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,8 @@
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"

/*
* vsyscall_gtod_data contains data that is :
* - readonly from vsyscalls
* - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
* Try to keep this structure as small as possible to avoid cache line ping pongs
*/
int __vgetcpu_mode __section_vgetcpu_mode;

struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
.lock = SEQLOCK_UNLOCKED,
.sysctl_enabled = 1,
Expand Down Expand Up @@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
*/
static __always_inline void do_get_tz(struct timezone * tz)
{
*tz = __vsyscall_gtod_data.sys_tz;
*tz = VVAR(vsyscall_gtod_data).sys_tz;
}

static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
Expand Down Expand Up @@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
unsigned long mult, shift, nsec;
cycle_t (*vread)(void);
do {
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);

vread = __vsyscall_gtod_data.clock.vread;
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
vread = VVAR(vsyscall_gtod_data).clock.vread;
if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
!vread)) {
gettimeofday(tv,NULL);
return;
}

now = vread();
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
shift = __vsyscall_gtod_data.clock.shift;
base = VVAR(vsyscall_gtod_data).clock.cycle_last;
mask = VVAR(vsyscall_gtod_data).clock.mask;
mult = VVAR(vsyscall_gtod_data).clock.mult;
shift = VVAR(vsyscall_gtod_data).clock.shift;

tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
nsec = __vsyscall_gtod_data.wall_time_nsec;
} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
} while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));

/* calculate interval: */
cycle_delta = (now - base) & mask;
Expand Down Expand Up @@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t)
{
unsigned seq;
time_t result;
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
return time_syscall(t);

do {
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);

result = __vsyscall_gtod_data.wall_time_sec;
result = VVAR(vsyscall_gtod_data).wall_time_sec;

} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
} while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));

if (t)
*t = result;
Expand Down Expand Up @@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
We do this here because otherwise user space would do it on
its own in a likely inferior way (no access to jiffies).
If you don't like it pass NULL. */
if (tcache && tcache->blob[0] == (j = __jiffies)) {
if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) {
p = tcache->blob[1];
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
} else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
native_read_tscp(&p);
} else {
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/vdso/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ vdso-install-$(VDSO32-y) += $(vdso32-images)


# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o

# files to link into kernel
obj-$(VDSO64-y) += vma.o vdso.o
Expand Down
3 changes: 1 addition & 2 deletions arch/x86/vdso/vclock_gettime.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
#include "vextern.h"

#define gtod vdso_vsyscall_gtod_data
#define gtod (&VVAR(vsyscall_gtod_data))

notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
Expand Down
7 changes: 0 additions & 7 deletions arch/x86/vdso/vdso.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,3 @@ VERSION {
}

VDSO64_PRELINK = VDSO_PRELINK;

/*
* Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
*/
#define VEXTERN(x) VDSO64_ ## x = vdso_ ## x;
#include "vextern.h"
#undef VEXTERN
16 changes: 0 additions & 16 deletions arch/x86/vdso/vextern.h

This file was deleted.

3 changes: 1 addition & 2 deletions arch/x86/vdso/vgetcpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
#include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include "vextern.h"

notrace long
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
unsigned int p;

if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
native_read_tscp(&p);
} else {
Expand Down
Loading

0 comments on commit 8c49d9a

Please sign in to comment.