Skip to content

Commit

Permalink
[PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID
Browse files Browse the repository at this point in the history
The extract cpu time instruction (ectg) instruction allows the user
process to get the current thread cputime without calling into the
kernel. The code that uses the instruction needs to switch to the
access registers mode to get access to the per-cpu info page that
contains the two base values that are needed to calculate the current
cputime from the CPU timer with the ectg instruction.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
Martin Schwidefsky committed Dec 31, 2008
1 parent 9cfb9b3 commit c742b31
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 32 deletions.
9 changes: 8 additions & 1 deletion arch/s390/include/asm/lowcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@
#define __LC_IPLDEV 0xDB8
#define __LC_CURRENT 0xDD8
#define __LC_INT_CLOCK 0xDE8
#define __LC_VDSO_PER_CPU 0xE38
#endif /* __s390x__ */

#define __LC_PASTE 0xE40

#define __LC_PANIC_MAGIC 0xE00
#ifndef __s390x__
Expand Down Expand Up @@ -381,7 +383,12 @@ struct _lowcore
/* whether the kernel died with panic() or not */
__u32 panic_magic; /* 0xe00 */

__u8 pad13[0x11b8-0xe04]; /* 0xe04 */
/* Per cpu primary space access list */
__u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
__u32 vdso_per_cpu_data; /* 0xe3c */
__u32 paste[16]; /* 0xe40 */

__u8 pad13[0x11b8-0xe80]; /* 0xe80 */

/* 64 bit extparam used for pfault, diag 250 etc */
__u64 ext_params2; /* 0x11B8 */
Expand Down
15 changes: 13 additions & 2 deletions arch/s390/include/asm/vdso.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
#ifndef __ASSEMBLY__

/*
* Note about this structure:
* Note about the vdso_data and vdso_per_cpu_data structures:
*
* NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this
* NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
* structure is supposed to be known only to the function in the vdso
* itself and may change without notice.
*/
Expand All @@ -28,10 +28,21 @@ struct vdso_data {
__u64 wtom_clock_nsec; /* 0x28 */
__u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
__u32 tz_dsttime; /* Type of dst correction 0x34 */
__u32 ectg_available;
};

struct vdso_per_cpu_data {
__u64 ectg_timer_base;
__u64 ectg_user_time;
};

extern struct vdso_data *vdso_data;

#ifdef CONFIG_64BIT
int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
#endif

#endif /* __ASSEMBLY__ */

#endif /* __KERNEL__ */
Expand Down
5 changes: 5 additions & 0 deletions arch/s390/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ int main(void)
DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
DEFINE(__VDSO_ECTG_BASE,
offsetof(struct vdso_per_cpu_data, ectg_timer_base));
DEFINE(__VDSO_ECTG_USER,
offsetof(struct vdso_per_cpu_data, ectg_user_time));
/* constants used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
Expand Down
45 changes: 24 additions & 21 deletions arch/s390/kernel/entry64.S
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
.if !\sync
ni \psworg+1,0xfd # clear wait state bit
.endif
lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
lpswe \psworg # back to caller
.endm

Expand Down Expand Up @@ -980,23 +983,23 @@ cleanup_sysc_return:

cleanup_sysc_leave:
clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
je 2f
mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
je 3f
clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
je 2f
mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
jhe 0f
mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
cghi %r12,__LC_MCK_OLD_PSW
jne 0f
jne 1f
mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
j 1f
0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
1: lmg %r0,%r11,SP_R0(%r15)
j 2f
1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
2: lmg %r0,%r11,SP_R0(%r15)
lg %r15,SP_R15(%r15)
2: la %r12,__LC_RETURN_PSW
3: la %r12,__LC_RETURN_PSW
br %r14
cleanup_sysc_leave_insn:
.quad sysc_done - 4
.quad sysc_done - 8
.quad sysc_done - 16

cleanup_io_return:
mvc __LC_RETURN_PSW(8),0(%r12)
Expand All @@ -1006,23 +1009,23 @@ cleanup_io_return:

cleanup_io_leave:
clc 8(8,%r12),BASED(cleanup_io_leave_insn)
je 2f
mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
je 3f
clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
je 2f
mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
jhe 0f
mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
cghi %r12,__LC_MCK_OLD_PSW
jne 0f
jne 1f
mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
j 1f
0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
1: lmg %r0,%r11,SP_R0(%r15)
j 2f
1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
2: lmg %r0,%r11,SP_R0(%r15)
lg %r15,SP_R15(%r15)
2: la %r12,__LC_RETURN_PSW
3: la %r12,__LC_RETURN_PSW
br %r14
cleanup_io_leave_insn:
.quad io_done - 4
.quad io_done - 8
.quad io_done - 16

/*
* Integer constants
Expand Down
2 changes: 2 additions & 0 deletions arch/s390/kernel/head64.S
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ startup_continue:
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
lghi %r0,__LC_PASTE
stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
Expand Down
2 changes: 2 additions & 0 deletions arch/s390/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,8 @@ setup_lowcore(void)
/* enable extended save area */
__ctl_set_bit(14, 29);
}
#else
lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
#endif
set_prefix((u32)(unsigned long) lc);
}
Expand Down
9 changes: 9 additions & 0 deletions arch/s390/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <asm/lowcore.h>
#include <asm/sclp.h>
#include <asm/cpu.h>
#include <asm/vdso.h>
#include "entry.h"

/*
Expand Down Expand Up @@ -506,6 +507,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
goto out;
lowcore->extended_save_area_addr = (u32) save_area;
}
#else
if (vdso_alloc_per_cpu(cpu, lowcore))
goto out;
#endif
lowcore_ptr[cpu] = lowcore;
return 0;
Expand All @@ -528,6 +532,8 @@ static void smp_free_lowcore(int cpu)
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
free_page((unsigned long) lowcore->extended_save_area_addr);
#else
vdso_free_per_cpu(cpu, lowcore);
#endif
free_page(lowcore->panic_stack - PAGE_SIZE);
free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
Expand Down Expand Up @@ -670,6 +676,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
panic_stack = __get_free_page(GFP_KERNEL);
async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
BUG_ON(!lowcore || !panic_stack || !async_stack);
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
save_area = get_zeroed_page(GFP_KERNEL);
Expand All @@ -683,6 +690,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
lowcore->extended_save_area_addr = (u32) save_area;
#else
BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
#endif
set_prefix((u32)(unsigned long) lowcore);
local_mcck_enable();
Expand Down
123 changes: 120 additions & 3 deletions arch/s390/kernel/vdso.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
#include <asm/sections.h>
#include <asm/vdso.h>

/* Max supported size for symbol names */
#define MAX_SYMNAME 64

#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
Expand Down Expand Up @@ -70,6 +67,119 @@ static union {
} vdso_data_store __attribute__((__section__(".data.page_aligned")));
struct vdso_data *vdso_data = &vdso_data_store.data;

/*
* Setup vdso data page.
*/
static void vdso_init_data(struct vdso_data *vd)
{
unsigned int facility_list;

facility_list = stfl();
vd->ectg_available = switch_amode && (facility_list & 1);
}

#ifdef CONFIG_64BIT
/*
* Setup per cpu vdso data page.
*/
static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
{
}

/*
* Allocate/free per cpu vdso data.
*/
#ifdef CONFIG_64BIT
#define SEGMENT_ORDER 2
#else
#define SEGMENT_ORDER 1
#endif

int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
int i;

lowcore->vdso_per_cpu_data = __LC_PASTE;

if (!switch_amode || !vdso_enabled)
return 0;

segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
page_frame = get_zeroed_page(GFP_KERNEL);
if (!segment_table || !page_table || !page_frame)
goto out;

clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
PAGE_SIZE << SEGMENT_ORDER);
clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
256*sizeof(unsigned long));

*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
*(unsigned long *) page_table = _PAGE_RO + page_frame;

psal = (u32 *) (page_table + 256*sizeof(unsigned long));
aste = psal + 32;

for (i = 4; i < 32; i += 4)
psal[i] = 0x80000000;

lowcore->paste[4] = (u32)(addr_t) psal;
psal[0] = 0x20000000;
psal[2] = (u32)(addr_t) aste;
*(unsigned long *) (aste + 2) = segment_table +
_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
aste[4] = (u32)(addr_t) psal;
lowcore->vdso_per_cpu_data = page_frame;

vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
return 0;

out:
free_page(page_frame);
free_page(page_table);
free_pages(segment_table, SEGMENT_ORDER);
return -ENOMEM;
}

#ifdef CONFIG_HOTPLUG_CPU
void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;

if (!switch_amode || !vdso_enabled)
return;

psal = (u32 *)(addr_t) lowcore->paste[4];
aste = (u32 *)(addr_t) psal[2];
segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
page_table = *(unsigned long *) segment_table;
page_frame = *(unsigned long *) page_table;

free_page(page_frame);
free_page(page_table);
free_pages(segment_table, SEGMENT_ORDER);
}
#endif /* CONFIG_HOTPLUG_CPU */

static void __vdso_init_cr5(void *dummy)
{
unsigned long cr5;

cr5 = offsetof(struct _lowcore, paste);
__ctl_load(cr5, 5, 5);
}

static void vdso_init_cr5(void)
{
if (switch_amode && vdso_enabled)
on_each_cpu(__vdso_init_cr5, NULL, 1);
}
#endif /* CONFIG_64BIT */

/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
Expand Down Expand Up @@ -172,6 +282,9 @@ static int __init vdso_init(void)
{
int i;

if (!vdso_enabled)
return 0;
vdso_init_data(vdso_data);
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
/* Calculate the size of the 32 bit vDSO */
vdso32_pages = ((&vdso32_end - &vdso32_start
Expand Down Expand Up @@ -208,6 +321,10 @@ static int __init vdso_init(void)
}
vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
vdso64_pagelist[vdso64_pages] = NULL;
#ifndef CONFIG_SMP
BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
#endif
vdso_init_cr5();
#endif /* CONFIG_64BIT */

get_page(virt_to_page(vdso_data));
Expand Down
5 changes: 5 additions & 0 deletions arch/s390/kernel/vdso64/clock_getres.S
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ __kernel_clock_getres:
cghi %r2,CLOCK_REALTIME
je 0f
cghi %r2,CLOCK_MONOTONIC
je 0f
cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
jne 2f
larl %r5,_vdso_data
icm %r0,15,__LC_ECTG_OK(%r5)
jz 2f
0: ltgr %r3,%r3
jz 1f /* res == NULL */
larl %r1,3f
Expand Down
Loading

0 comments on commit c742b31

Please sign in to comment.