Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The biggest change affects group scheduling: we now track the runnable
  average on a per-task entity basis, allowing a smoother, exponential
  decay average based load/weight estimation instead of the previous
  binary on-the-runqueue/off-the-runqueue load weight method.

  This will inevitably disturb workloads that were in some sort of
  borderline balancing state or unstable equilibrium, so an eye has to
  be kept on regressions.

  For that reason the new load average is only limited to group
  scheduling (shares distribution) at the moment (which was also hurting
  the most from the prior, crude weight calculation and whose scheduling
  quality wins most from this change) - but we plan to extend this to
  regular SMP balancing as well in the future, which will simplify and
  speed up things a bit.

  Other changes involve ongoing preparatory work to extend NOHZ to the
  scheduler as well, eventually allowing completely irq-free user-space
  execution."

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  Revert "sched/autogroup: Fix crash on reboot when autogroup is disabled"
  cputime: Comment cputime's adjusting code
  cputime: Consolidate cputime adjustment code
  cputime: Rename thread_group_times to thread_group_cputime_adjusted
  cputime: Move thread_group_cputime() to sched code
  vtime: Warn if irqs aren't disabled on system time accounting APIs
  vtime: No need to disable irqs on vtime_account()
  vtime: Consolidate a bit the ctx switch code
  vtime: Explicitly account pending user time on process tick
  vtime: Remove the underscore prefix invasion
  sched/autogroup: Fix crash on reboot when autogroup is disabled
  cputime: Separate irqtime accounting from generic vtime
  cputime: Specialize irq vtime hooks
  kvm: Directly account vtime to system on guest switch
  vtime: Make vtime_account_system() irqsafe
  vtime: Gather vtime declarations to their own header file
  sched: Describe CFS load-balancer
  sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking
  sched: Make __update_entity_runnable_avg() fast
  sched: Update_cfs_shares at period edge
  ...
  • Loading branch information
Linus Torvalds committed Dec 12, 2012
2 parents da830e5 + c1ad41f commit f57d54b
Show file tree
Hide file tree
Showing 26 changed files with 1,082 additions and 335 deletions.
2 changes: 2 additions & 0 deletions arch/ia64/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,7 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
#define cputime64_to_clock_t(__ct) \
cputime_to_clock_t((__force cputime_t)__ct)

extern void arch_vtime_task_switch(struct task_struct *tsk);

#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */
22 changes: 4 additions & 18 deletions arch/ia64/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ static struct clocksource *itc_clocksource;

extern cputime_t cycle_to_cputime(u64 cyc);

static void vtime_account_user(struct task_struct *tsk)
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_utime;
struct thread_info *ti = task_thread_info(tsk);
Expand All @@ -100,18 +100,11 @@ static void vtime_account_user(struct task_struct *tsk)
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
void vtime_task_switch(struct task_struct *prev)
void arch_vtime_task_switch(struct task_struct *prev)
{
struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(current);

if (idle_task(smp_processor_id()) != prev)
vtime_account_system(prev);
else
vtime_account_idle(prev);

vtime_account_user(prev);

pi->ac_stamp = ni->ac_stamp;
ni->ac_stime = ni->ac_utime = 0;
}
Expand All @@ -126,6 +119,8 @@ static cputime_t vtime_delta(struct task_struct *tsk)
cputime_t delta_stime;
__u64 now;

WARN_ON_ONCE(!irqs_disabled());

now = ia64_get_itc();

delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
Expand All @@ -147,15 +142,6 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(vtime_delta(tsk));
}

/*
* Called from the timer interrupt handler to charge accumulated user time
* to the current process. Must be called with interrupts disabled.
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
vtime_account_user(p);
}

#endif /* CONFIG_VIRT_CPU_ACCOUNTING */

static irqreturn_t
Expand Down
2 changes: 2 additions & 0 deletions arch/powerpc/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)

#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))

static inline void arch_vtime_task_switch(struct task_struct *tsk) { }

#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __POWERPC_CPUTIME_H */
20 changes: 8 additions & 12 deletions arch/powerpc/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ static u64 vtime_delta(struct task_struct *tsk,
u64 now, nowscaled, deltascaled;
u64 udelta, delta, user_scaled;

WARN_ON_ONCE(!irqs_disabled());

now = mftb();
nowscaled = read_spurr(now);
get_paca()->system_time += now - get_paca()->starttime;
Expand Down Expand Up @@ -355,15 +357,15 @@ void vtime_account_idle(struct task_struct *tsk)
}

/*
* Transfer the user and system times accumulated in the paca
* by the exception entry and exit code to the generic process
* user and system time records.
* Transfer the user time accumulated in the paca
* by the exception entry and exit code to the generic
* process user time records.
* Must be called with interrupts disabled.
* Assumes that vtime_account() has been called recently
* (i.e. since the last entry from usermode) so that
* Assumes that vtime_account_system/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
void account_process_tick(struct task_struct *tsk, int user_tick)
void vtime_account_user(struct task_struct *tsk)
{
cputime_t utime, utimescaled;

Expand All @@ -375,12 +377,6 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
account_user_time(tsk, utime, utimescaled);
}

void vtime_task_switch(struct task_struct *prev)
{
vtime_account(prev);
account_process_tick(prev, 0);
}

#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#define calc_cputime_factors()
#endif
Expand Down
1 change: 1 addition & 0 deletions arch/s390/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


#define __ARCH_HAS_VTIME_ACCOUNT
#define __ARCH_HAS_VTIME_TASK_SWITCH

/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */

Expand Down
13 changes: 12 additions & 1 deletion arch/s390/kernel/vtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ void vtime_task_switch(struct task_struct *prev)
S390_lowcore.system_timer = ti->system_timer;
}

void account_process_tick(struct task_struct *tsk, int user_tick)
/*
* In s390, accounting pending user time also implies
* accounting system time in order to correctly compute
* the stolen time accounting.
*/
void vtime_account_user(struct task_struct *tsk)
{
if (do_account_vtime(tsk, HARDIRQ_OFFSET))
virt_timer_expire();
Expand All @@ -127,6 +132,8 @@ void vtime_account(struct task_struct *tsk)
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;

WARN_ON_ONCE(!irqs_disabled());

timer = S390_lowcore.last_update_timer;
S390_lowcore.last_update_timer = get_vtimer();
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
Expand All @@ -140,6 +147,10 @@ void vtime_account(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account);

void vtime_account_system(struct task_struct *tsk)
__attribute__((alias("vtime_account")));
EXPORT_SYMBOL_GPL(vtime_account_system);

void __kprobes vtime_stop_cpu(void)
{
struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
Expand Down
4 changes: 0 additions & 4 deletions arch/s390/kvm/kvm-s390.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,9 +608,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_s390_deliver_pending_interrupts(vcpu);

vcpu->arch.sie_block->icptcode = 0;
local_irq_disable();
kvm_guest_enter();
local_irq_enable();
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
atomic_read(&vcpu->arch.sie_block->cpuflags));
trace_kvm_s390_sie_enter(vcpu,
Expand All @@ -629,9 +627,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
local_irq_disable();
kvm_guest_exit();
local_irq_enable();

memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
return rc;
Expand Down
4 changes: 2 additions & 2 deletions fs/proc/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,

min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
thread_group_times(task, &utime, &stime);
thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
}

Expand All @@ -454,7 +454,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
task_times(task, &utime, &stime);
task_cputime_adjusted(task, &utime, &stime);
gtime = task->gtime;
}

Expand Down
15 changes: 3 additions & 12 deletions include/linux/hardirq.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <linux/preempt.h>
#include <linux/lockdep.h>
#include <linux/ftrace_irq.h>
#include <linux/vtime.h>
#include <asm/hardirq.h>

/*
Expand Down Expand Up @@ -129,16 +130,6 @@ extern void synchronize_irq(unsigned int irq);
# define synchronize_irq(irq) barrier()
#endif

struct task_struct;

#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
static inline void vtime_account(struct task_struct *tsk)
{
}
#else
extern void vtime_account(struct task_struct *tsk);
#endif

#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)

static inline void rcu_nmi_enter(void)
Expand All @@ -162,7 +153,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
vtime_account(current); \
vtime_account_irq_enter(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
Expand All @@ -178,7 +169,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
vtime_account(current); \
vtime_account_irq_exit(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)

Expand Down
17 changes: 9 additions & 8 deletions include/linux/kernel_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/vtime.h>
#include <asm/irq.h>
#include <asm/cputime.h>

Expand Down Expand Up @@ -126,16 +127,16 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);

extern void account_process_tick(struct task_struct *, int user);
extern void account_steal_ticks(unsigned long ticks);
extern void account_idle_ticks(unsigned long ticks);

#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_account_system(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
}
#else
static inline void vtime_task_switch(struct task_struct *prev) { }
extern void account_process_tick(struct task_struct *, int user);
#endif

extern void account_steal_ticks(unsigned long ticks);
extern void account_idle_ticks(unsigned long ticks);

#endif /* _LINUX_KERNEL_STAT_H */
12 changes: 10 additions & 2 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,11 @@ static inline int kvm_deassign_device(struct kvm *kvm,
static inline void kvm_guest_enter(void)
{
BUG_ON(preemptible());
vtime_account(current);
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system_irqsafe(current);
current->flags |= PF_VCPU;
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
Expand All @@ -740,7 +744,11 @@ static inline void kvm_guest_enter(void)

static inline void kvm_guest_exit(void)
{
vtime_account(current);
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system_irqsafe(current);
current->flags &= ~PF_VCPU;
}

Expand Down
Loading

0 comments on commit f57d54b

Please sign in to comment.