Skip to content

Commit

Permalink
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull NOHZ updates from Ingo Molnar:
 "NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors
  the NOHZ 'can the tick be stopped?' infrastructure and related code to
  be data driven, and harmonizes the naming and handling of all the
  various properties"

[ This makes the ugly "fetch_or()" macro that the scheduler used
  internally a new generic helper, and does a bad job at it.

  I'm pulling it, but I've asked Ingo and Frederic to get this
  fixed up ]

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched-clock: Migrate to use new tick dependency mask model
  posix-cpu-timers: Migrate to use new tick dependency mask model
  sched: Migrate sched to use new tick dependency mask model
  sched: Account rr tasks
  perf: Migrate perf to use new tick dependency mask model
  nohz: Use enum code for tick stop failure tracing message
  nohz: New tick dependency mask
  nohz: Implement wide kick on top of irq work
  atomic: Export fetch_or()
  • Loading branch information
Linus Torvalds committed Mar 15, 2016
2 parents d4e7961 + 1f25184 commit e23604e
Show file tree
Hide file tree
Showing 14 changed files with 424 additions and 161 deletions.
21 changes: 21 additions & 0 deletions include/linux/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
}
#endif

/**
* fetch_or - perform *ptr |= mask and return old value of *ptr
* @ptr: pointer to value
* @mask: mask to OR on the value
*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#ifndef fetch_or
#define fetch_or(ptr, mask) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (mask)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#endif


#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
#endif
Expand Down
6 changes: 0 additions & 6 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
#endif

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
extern bool perf_event_can_stop_tick(void);
#else
static inline bool perf_event_can_stop_tick(void) { return true; }
#endif

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void);
#else
Expand Down
3 changes: 0 additions & 3 deletions include/linux/posix-timers.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task);

bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);

void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval);

Expand Down
11 changes: 8 additions & 3 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,10 @@ struct signal_struct {
/* Earliest-expiration cache. */
struct task_cputime cputime_expires;

#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif

struct list_head cpu_timers[3];

struct pid *tty_old_pgrp;
Expand Down Expand Up @@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS,
} vtime_snap_whence;
#endif

#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */
Expand Down Expand Up @@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
#endif

#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(void);
extern u64 scheduler_tick_max_deferment(void);
#else
static inline bool sched_can_stop_tick(void) { return false; }
#endif

#ifdef CONFIG_SCHED_AUTOGROUP
Expand Down
97 changes: 93 additions & 4 deletions include/linux/tick.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
}

enum tick_dep_bits {
TICK_DEP_BIT_POSIX_TIMER = 0,
TICK_DEP_BIT_PERF_EVENTS = 1,
TICK_DEP_BIT_SCHED = 2,
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
};

#define TICK_DEP_MASK_NONE 0
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)

#ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void);
Expand Down Expand Up @@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
}

extern void tick_nohz_full_kick(void);
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);

/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
* on top of static keys.
*/
static inline void tick_dep_set(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set(bit);
}

static inline void tick_dep_clear(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear(bit);
}

static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_set_cpu(cpu, bit);
}

static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_clear_cpu(cpu, bit);
}

static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_signal(signal, bit);
}

extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void);
#else
static inline int housekeeping_any_cpu(void)
Expand All @@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }

static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }

static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(void) { }
#endif

Expand Down
36 changes: 31 additions & 5 deletions include/trace/events/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
);

#ifdef CONFIG_NO_HZ_COMMON

#define TICK_DEP_NAMES \
tick_dep_name(NONE) \
tick_dep_name(POSIX_TIMER) \
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name_end(CLOCK_UNSTABLE)

#undef tick_dep_name
#undef tick_dep_name_end

#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);

TICK_DEP_NAMES

#undef tick_dep_name
#undef tick_dep_name_end

#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }

#define show_tick_dep_name(val) \
__print_symbolic(val, TICK_DEP_NAMES)

TRACE_EVENT(tick_stop,

TP_PROTO(int success, char *error_msg),
TP_PROTO(int success, int dependency),

TP_ARGS(success, error_msg),
TP_ARGS(success, dependency),

TP_STRUCT__entry(
__field( int , success )
__string( msg, error_msg )
__field( int , dependency )
),

TP_fast_assign(
__entry->success = success;
__assign_str(msg, error_msg);
__entry->dependency = dependency;
),

TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
TP_printk("success=%d dependency=%s", __entry->success, \
show_tick_dep_name(__entry->dependency))
);
#endif

Expand Down
65 changes: 48 additions & 17 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
return rotate;
}

#ifdef CONFIG_NO_HZ_FULL
bool perf_event_can_stop_tick(void)
{
if (atomic_read(&nr_freq_events) ||
__this_cpu_read(perf_throttled_count))
return false;
else
return true;
}
#endif

void perf_event_task_tick(void)
{
struct list_head *head = this_cpu_ptr(&active_ctx_list);
Expand All @@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)

__this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0);
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);

list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
perf_adjust_freq_unthr_context(ctx, throttled);
Expand Down Expand Up @@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
}

#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif

static void unaccount_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
spin_lock(&nr_freq_lock);
if (atomic_dec_and_test(&nr_freq_events))
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}

static void unaccount_freq_event(void)
{
if (tick_nohz_full_enabled())
unaccount_freq_event_nohz();
else
atomic_dec(&nr_freq_events);
}

static void unaccount_event(struct perf_event *event)
{
bool dec = false;
Expand All @@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
atomic_dec(&nr_freq_events);
unaccount_freq_event();
if (event->attr.context_switch) {
dec = true;
atomic_dec(&nr_switch_events);
Expand Down Expand Up @@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(throttle
&& hwc->interrupts >= max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count);
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
hwc->interrupts = MAX_INTERRUPTS;
perf_log_throttle(event, 0);
tick_nohz_full_kick();
ret = 1;
}
}
Expand Down Expand Up @@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
}

/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
/* Lock so we don't race with concurrent unaccount */
spin_lock(&nr_freq_lock);
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}

static void account_freq_event(void)
{
if (tick_nohz_full_enabled())
account_freq_event_nohz();
else
atomic_inc(&nr_freq_events);
}


static void account_event(struct perf_event *event)
{
bool inc = false;
Expand All @@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq) {
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_full_kick_all();
}
if (event->attr.freq)
account_freq_event();
if (event->attr.context_switch) {
atomic_inc(&nr_switch_events);
inc = true;
Expand Down
Loading

0 comments on commit e23604e

Please sign in to comment.