Skip to content

Commit

Permalink
powerpc: Replace __get_cpu_var uses
Browse files Browse the repository at this point in the history
This still has not been merged and now powerpc is the only arch that does
not have this change. Sorry about missing linuxppc-dev before.

V2->V2
  - Fix up to work against 3.18-rc1

__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x).  This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.

Other use cases are for storing and retrieving data from the current
processors percpu area.  __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.

__get_cpu_var() is defined as :

__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.

this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.

This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset.  Thereby address calculations are avoided and less registers
are used when code is generated.

At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.

The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e.  using a global
register that may be set to the per cpu base.

Transformations done to __get_cpu_var()

1. Determine the address of the percpu instance of the current processor.

	DEFINE_PER_CPU(int, y);
	int *x = &__get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(&y);

2. Same as #1 but this time an array structure is involved.

	DEFINE_PER_CPU(int, y[20]);
	int *x = __get_cpu_var(y);

    Converts to

	int *x = this_cpu_ptr(y);

3. Retrieve the content of the current processors instance of a per cpu
variable.

	DEFINE_PER_CPU(int, y);
	int x = __get_cpu_var(y)

   Converts to

	int x = __this_cpu_read(y);

4. Retrieve the content of a percpu struct

	DEFINE_PER_CPU(struct mystruct, y);
	struct mystruct x = __get_cpu_var(y);

   Converts to

	memcpy(&x, this_cpu_ptr(&y), sizeof(x));

5. Assignment to a per cpu variable

	DEFINE_PER_CPU(int, y)
	__get_cpu_var(y) = x;

   Converts to

	__this_cpu_write(y, x);

6. Increment/Decrement etc of a per cpu variable

	DEFINE_PER_CPU(int, y);
	__get_cpu_var(y)++

   Converts to

	__this_cpu_inc(y)

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
[mpe: Fix build errors caused by set/or_softirq_pending(), and rework
      assignment in __set_breakpoint() to use memcpy().]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Christoph Lameter authored and Michael Ellerman committed Nov 3, 2014
1 parent 0df1f24 commit 69111ba
Show file tree
Hide file tree
Showing 32 changed files with 108 additions and 103 deletions.
7 changes: 6 additions & 1 deletion arch/powerpc/include/asm/hardirq.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);

#define __ARCH_IRQ_STAT

#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending
#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending)

#define __ARCH_SET_SOFTIRQ_PENDING

#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x))

static inline void ack_bad_irq(unsigned int irq)
{
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);

static inline void arch_enter_lazy_mmu_mode(void)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);

batch->active = 1;
}

static inline void arch_leave_lazy_mmu_mode(void)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);

if (batch->index)
__flush_tlb_pending(batch);
Expand Down
8 changes: 4 additions & 4 deletions arch/powerpc/include/asm/xics.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr);

static inline void xics_push_cppr(unsigned int vec)
{
struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);

if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
return;
Expand All @@ -111,7 +111,7 @@ static inline void xics_push_cppr(unsigned int vec)

static inline unsigned char xics_pop_cppr(void)
{
struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);

if (WARN_ON(os_cppr->index < 1))
return LOWEST_PRIORITY;
Expand All @@ -121,7 +121,7 @@ static inline unsigned char xics_pop_cppr(void)

static inline void xics_set_base_cppr(unsigned char cppr)
{
struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);

/* we only really want to set the priority when there's
* just one cppr value on the stack
Expand All @@ -133,7 +133,7 @@ static inline void xics_set_base_cppr(unsigned char cppr)

static inline unsigned char xics_cppr_top(void)
{
struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);

return os_cppr->stack[os_cppr->index];
}
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/dbell.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs)

may_hard_irq_enable();

__get_cpu_var(irq_stat).doorbell_irqs++;
__this_cpu_inc(irq_stat.doorbell_irqs);

smp_ipi_demux();

Expand Down
6 changes: 3 additions & 3 deletions arch/powerpc/kernel/hw_breakpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type)
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
struct perf_event **slot = &__get_cpu_var(bp_per_reg);
struct perf_event **slot = this_cpu_ptr(&bp_per_reg);

*slot = bp;

Expand All @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
struct perf_event **slot = &__get_cpu_var(bp_per_reg);
struct perf_event **slot = this_cpu_ptr(&bp_per_reg);

if (*slot != bp) {
WARN_ONCE(1, "Can't find the breakpoint");
Expand Down Expand Up @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();

bp = __get_cpu_var(bp_per_reg);
bp = __this_cpu_read(bp_per_reg);
if (!bp)
goto out;
info = counter_arch_bp(bp);
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
* We don't need to disable preemption here because any CPU can
* safely use any IOMMU pool.
*/
pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);

if (largealloc)
pool = &(tbl->large_pool);
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable)
static inline notrace int decrementer_check_overflow(void)
{
u64 now = get_tb_or_rtc();
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);

return now >= *next_tb;
}
Expand Down Expand Up @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs)

/* And finally process it */
if (unlikely(irq == NO_IRQ))
__get_cpu_var(irq_stat).spurious_irqs++;
__this_cpu_inc(irq_stat.spurious_irqs);
else
generic_handle_irq(irq);

Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/kgdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs)
{
struct thread_info *thread_info, *exception_thread_info;
struct thread_info *backup_current_thread_info =
&__get_cpu_var(kgdb_thread_info);
this_cpu_ptr(&kgdb_thread_info);

if (user_mode(regs))
return 0;
Expand Down
6 changes: 3 additions & 3 deletions arch/powerpc/kernel/kprobes.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,15 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)

static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
}

static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
__get_cpu_var(current_kprobe) = p;
__this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_msr = regs->msr;
}

Expand Down Expand Up @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
ret = 1;
goto no_kprobe;
}
p = __get_cpu_var(current_kprobe);
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe;
}
Expand Down
24 changes: 12 additions & 12 deletions arch/powerpc/kernel/mce.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
uint64_t nip, uint64_t addr)
{
uint64_t srr1;
int index = __get_cpu_var(mce_nest_count)++;
struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
int index = __this_cpu_inc_return(mce_nest_count);
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);

/*
* Return if we don't have enough space to log mce event.
Expand Down Expand Up @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
*/
int get_mce_event(struct machine_check_event *mce, bool release)
{
int index = __get_cpu_var(mce_nest_count) - 1;
int index = __this_cpu_read(mce_nest_count) - 1;
struct machine_check_event *mc_evt;
int ret = 0;

Expand All @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)

/* Check if we have MCE info to process. */
if (index < MAX_MC_EVT) {
mc_evt = &__get_cpu_var(mce_event[index]);
mc_evt = this_cpu_ptr(&mce_event[index]);
/* Copy the event structure and release the original */
if (mce)
*mce = *mc_evt;
Expand All @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
}
/* Decrement the count to free the slot. */
if (release)
__get_cpu_var(mce_nest_count)--;
__this_cpu_dec(mce_nest_count);

return ret;
}
Expand All @@ -184,13 +184,13 @@ void machine_check_queue_event(void)
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;

index = __get_cpu_var(mce_queue_count)++;
index = __this_cpu_inc_return(mce_queue_count);
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
__get_cpu_var(mce_queue_count)--;
__this_cpu_dec(mce_queue_count);
return;
}
__get_cpu_var(mce_event_queue[index]) = evt;
memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));

/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
Expand All @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work)
* For now just print it to console.
* TODO: log this error event to FSP or nvram.
*/
while (__get_cpu_var(mce_queue_count) > 0) {
index = __get_cpu_var(mce_queue_count) - 1;
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
machine_check_print_event_info(
&__get_cpu_var(mce_event_queue[index]));
__get_cpu_var(mce_queue_count)--;
this_cpu_ptr(&mce_event_queue[index]));
__this_cpu_dec(mce_queue_count);
}
}

Expand Down
10 changes: 5 additions & 5 deletions arch/powerpc/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk)

void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
__get_cpu_var(current_brk) = *brk;
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));

if (cpu_has_feature(CPU_FTR_DAWR))
set_dawr(brk);
Expand Down Expand Up @@ -842,7 +842,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* schedule DABR
*/
#ifndef CONFIG_HAVE_HW_BREAKPOINT
if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
if (unlikely(!hw_brk_match(this_cpu_ptr(&current_brk), &new->thread.hw_brk)))
__set_breakpoint(&new->thread.hw_brk);
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
Expand All @@ -856,7 +856,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* Collect processor utilization data per process
*/
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array);
long unsigned start_tb, current_tb;
start_tb = old_thread->start_tb;
cu->current_tb = current_tb = mfspr(SPRN_PURR);
Expand All @@ -866,7 +866,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
#endif /* CONFIG_PPC64 */

#ifdef CONFIG_PPC_BOOK3S_64
batch = &__get_cpu_var(ppc64_tlb_batch);
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
if (batch->index)
Expand All @@ -889,7 +889,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
#ifdef CONFIG_PPC_BOOK3S_64
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = &__get_cpu_var(ppc64_tlb_batch);
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
#endif /* CONFIG_PPC_BOOK3S_64 */
Expand Down
6 changes: 3 additions & 3 deletions arch/powerpc/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)

irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = &__get_cpu_var(ipi_message);
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
unsigned int all;

mb(); /* order any irq clear */
Expand Down Expand Up @@ -442,9 +442,9 @@ void generic_mach_cpu_die(void)
idle_task_exit();
cpu = smp_processor_id();
printk(KERN_DEBUG "CPU%d offline\n", cpu);
__get_cpu_var(cpu_state) = CPU_DEAD;
__this_cpu_write(cpu_state, CPU_DEAD);
smp_wmb();
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
}

Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/kernel/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void)
ppc_set_pmu_inuse(1);

/* Only need to enable them once */
if (__get_cpu_var(pmcs_enabled))
if (__this_cpu_read(pmcs_enabled))
return;

__get_cpu_var(pmcs_enabled) = 1;
__this_cpu_write(pmcs_enabled, 1);

if (ppc_md.enable_pmcs)
ppc_md.enable_pmcs();
Expand Down
22 changes: 11 additions & 11 deletions arch/powerpc/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void)

DEFINE_PER_CPU(u8, irq_work_pending);

#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1)
#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)

#endif /* 32 vs 64 bit */

Expand All @@ -482,8 +482,8 @@ void arch_irq_work_raise(void)
static void __timer_interrupt(void)
{
struct pt_regs *regs = get_irq_regs();
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
struct clock_event_device *evt = &__get_cpu_var(decrementers);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
struct clock_event_device *evt = this_cpu_ptr(&decrementers);
u64 now;

trace_timer_interrupt_entry(regs);
Expand All @@ -498,21 +498,21 @@ static void __timer_interrupt(void)
*next_tb = ~(u64)0;
if (evt->event_handler)
evt->event_handler(evt);
__get_cpu_var(irq_stat).timer_irqs_event++;
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
if (now <= DECREMENTER_MAX)
set_dec((int)now);
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
__get_cpu_var(irq_stat).timer_irqs_others++;
__this_cpu_inc(irq_stat.timer_irqs_others);
}

#ifdef CONFIG_PPC64
/* collect purr register values often, for accurate calculations */
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array);
cu->current_tb = mfspr(SPRN_PURR);
}
#endif
Expand All @@ -527,7 +527,7 @@ static void __timer_interrupt(void)
void timer_interrupt(struct pt_regs * regs)
{
struct pt_regs *old_regs;
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);

/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continue to take decrementer exceptions.
Expand Down Expand Up @@ -813,7 +813,7 @@ static void __init clocksource_init(void)
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
__get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
__this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt);
set_dec(evt);

/* We may have raced with new irq work */
Expand All @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode,
/* Interrupt handler for the timer broadcast IPI */
void tick_broadcast_ipi_handler(void)
{
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);

*next_tb = get_tb_or_rtc();
__timer_interrupt();
Expand Down
Loading

0 comments on commit 69111ba

Please sign in to comment.