Skip to content

Commit

Permalink
Merge branch 'timers/core-v9' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz enhancements from Frederic Weisbecker:

"Currently in nohz full configs, the tick dependency is checked
 asynchronously by nohz code from interrupt and context switch for each
 concerned subsystem with a set of function provided by these. Such
 functions are made of many conditions and details that can be heavyweight
 as they are called on fastpath: sched_can_stop_tick(),
 posix_cpu_timer_can_stop_tick(), perf_event_can_stop_tick()...

 Thomas suggested a few months ago to make that tick dependency check
 synchronous. Instead of checking subsystems details from each interrupt
 to guess if the tick can be stopped, every subsystem that may have a tick
 dependency should set itself a flag specifying the state of that
 dependency. This way we can verify if we can stop the tick with a single
 lightweight mask check on fast path.

 This conversion from a pull to a push model to implement tick dependency
 is the core feature of this patchset that is split into:

  * Nohz wide kick simplification
  * Improve nohz tracing
  * Introduce tick dependency mask
  * Migrate scheduler, posix timers, perf events and sched clock tick
    dependencies to the tick dependency mask."

Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Mar 8, 2016
2 parents e2857b8 + 4f49b90 commit 1f25184
Show file tree
Hide file tree
Showing 14 changed files with 424 additions and 161 deletions.
21 changes: 21 additions & 0 deletions include/linux/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
}
#endif

/**
* fetch_or - perform *ptr |= mask and return old value of *ptr
* @ptr: pointer to value
* @mask: mask to OR on the value
*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#ifndef fetch_or
#define fetch_or(ptr, mask) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (mask)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#endif


#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
#endif
Expand Down
6 changes: 0 additions & 6 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1109,12 +1109,6 @@ static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
#endif

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
extern bool perf_event_can_stop_tick(void);
#else
static inline bool perf_event_can_stop_tick(void) { return true; }
#endif

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void);
#else
Expand Down
3 changes: 0 additions & 3 deletions include/linux/posix-timers.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task);

bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);

void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval);

Expand Down
11 changes: 8 additions & 3 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,10 @@ struct signal_struct {
/* Earliest-expiration cache. */
struct task_cputime cputime_expires;

#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif

struct list_head cpu_timers[3];

struct pid *tty_old_pgrp;
Expand Down Expand Up @@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS,
} vtime_snap_whence;
#endif

#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */
Expand Down Expand Up @@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
#endif

#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(void);
extern u64 scheduler_tick_max_deferment(void);
#else
static inline bool sched_can_stop_tick(void) { return false; }
#endif

#ifdef CONFIG_SCHED_AUTOGROUP
Expand Down
97 changes: 93 additions & 4 deletions include/linux/tick.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
}

enum tick_dep_bits {
TICK_DEP_BIT_POSIX_TIMER = 0,
TICK_DEP_BIT_PERF_EVENTS = 1,
TICK_DEP_BIT_SCHED = 2,
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
};

#define TICK_DEP_MASK_NONE 0
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)

#ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void);
Expand Down Expand Up @@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
}

extern void tick_nohz_full_kick(void);
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);

/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
* on top of static keys.
*/
static inline void tick_dep_set(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set(bit);
}

static inline void tick_dep_clear(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear(bit);
}

static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_set_cpu(cpu, bit);
}

static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_clear_cpu(cpu, bit);
}

static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_signal(signal, bit);
}

extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void);
#else
static inline int housekeeping_any_cpu(void)
Expand All @@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }

static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }

static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(void) { }
#endif

Expand Down
36 changes: 31 additions & 5 deletions include/trace/events/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
);

#ifdef CONFIG_NO_HZ_COMMON

#define TICK_DEP_NAMES \
tick_dep_name(NONE) \
tick_dep_name(POSIX_TIMER) \
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name_end(CLOCK_UNSTABLE)

#undef tick_dep_name
#undef tick_dep_name_end

#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);

TICK_DEP_NAMES

#undef tick_dep_name
#undef tick_dep_name_end

#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }

#define show_tick_dep_name(val) \
__print_symbolic(val, TICK_DEP_NAMES)

TRACE_EVENT(tick_stop,

TP_PROTO(int success, char *error_msg),
TP_PROTO(int success, int dependency),

TP_ARGS(success, error_msg),
TP_ARGS(success, dependency),

TP_STRUCT__entry(
__field( int , success )
__string( msg, error_msg )
__field( int , dependency )
),

TP_fast_assign(
__entry->success = success;
__assign_str(msg, error_msg);
__entry->dependency = dependency;
),

TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
TP_printk("success=%d dependency=%s", __entry->success, \
show_tick_dep_name(__entry->dependency))
);
#endif

Expand Down
65 changes: 48 additions & 17 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
return rotate;
}

#ifdef CONFIG_NO_HZ_FULL
bool perf_event_can_stop_tick(void)
{
if (atomic_read(&nr_freq_events) ||
__this_cpu_read(perf_throttled_count))
return false;
else
return true;
}
#endif

void perf_event_task_tick(void)
{
struct list_head *head = this_cpu_ptr(&active_ctx_list);
Expand All @@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)

__this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0);
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);

list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
perf_adjust_freq_unthr_context(ctx, throttled);
Expand Down Expand Up @@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
}

#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif

static void unaccount_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
spin_lock(&nr_freq_lock);
if (atomic_dec_and_test(&nr_freq_events))
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}

static void unaccount_freq_event(void)
{
if (tick_nohz_full_enabled())
unaccount_freq_event_nohz();
else
atomic_dec(&nr_freq_events);
}

static void unaccount_event(struct perf_event *event)
{
bool dec = false;
Expand All @@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
atomic_dec(&nr_freq_events);
unaccount_freq_event();
if (event->attr.context_switch) {
dec = true;
atomic_dec(&nr_switch_events);
Expand Down Expand Up @@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(throttle
&& hwc->interrupts >= max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count);
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
hwc->interrupts = MAX_INTERRUPTS;
perf_log_throttle(event, 0);
tick_nohz_full_kick();
ret = 1;
}
}
Expand Down Expand Up @@ -7816,6 +7828,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
}

/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
/* Lock so we don't race with concurrent unaccount */
spin_lock(&nr_freq_lock);
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}

static void account_freq_event(void)
{
if (tick_nohz_full_enabled())
account_freq_event_nohz();
else
atomic_inc(&nr_freq_events);
}


static void account_event(struct perf_event *event)
{
bool inc = false;
Expand All @@ -7831,10 +7864,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq) {
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_full_kick_all();
}
if (event->attr.freq)
account_freq_event();
if (event->attr.context_switch) {
atomic_inc(&nr_switch_events);
inc = true;
Expand Down
Loading

0 comments on commit 1f25184

Please sign in to comment.