Skip to content

Commit

Permalink
Merge tag 'arc-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/vgupta/arc

Pull ARC architecture updates from Vineet Gupta:
 "ARC updates for 4.3:

   - perf support for ARCv2 based cores (sampling interrupt, SMP)
   - leftovers for ARCv2 support
   - futex fixes"

* tag 'arc-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  ARCv2: entry: Fix reserved handler
  ARCv2: perf: Finally introduce HS perf unit
  ARCv2: perf: SMP support
  ARCv2: perf: implement exclusion of event counting in user or kernel mode
  ARCv2: perf: Support sampling events using overflow interrupts
  ARCv2: perf: implement "event_set_period"
  ARC: perf: cap the number of counters to hardware max of 32
  ARC: Eliminate some ARCv2 specific code for ARCompact build
  ARC: add/fix some comments in code - no functional change
  ARC: change some branchs to jumps to resolve linkage errors
  ARC: ensure futex ops are atomic in !LLSC config
  ARC: Enable HAVE_FUTEX_CMPXCHG
  ARC: make futex_atomic_cmpxchg_inatomic() return bimodal
  ARC: futex cosmetics
  ARC: add barriers to futex code
  ARCv2: IOC: Allow boot time disable
  ARCv2: SLC: Allow boot time disable
  ARCv2: Support IO Coherency and permutations involving L1 and L2 caches
  ARC: Enable optimistic spinning for LLSC config
  MAINTAINERS: add git tree for the arc architecture
  • Loading branch information
Linus Torvalds committed Sep 1, 2015
2 parents 361f7d1 + 3d59265 commit 28dce7c
Show file tree
Hide file tree
Showing 17 changed files with 524 additions and 139 deletions.
17 changes: 17 additions & 0 deletions Documentation/devicetree/bindings/arc/archs-pct.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
* ARC HS Performance Counters

The ARC HS can be configured with a pipeline performance monitor for counting
CPU and cache events like cache misses and hits. Like conventional PCT there
are 100+ hardware conditions dynamically mapped to upto 32 counters.
It also supports overflow interrupts.

Required properties:

- compatible : should contain
"snps,archs-pct"

Example:

pmu {
compatible = "snps,archs-pct";
};
3 changes: 2 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -9911,8 +9911,9 @@ SYNOPSYS ARC ARCHITECTURE
M: Vineet Gupta <vgupta@synopsys.com>
S: Supported
F: arch/arc/
F: Documentation/devicetree/bindings/arc/
F: Documentation/devicetree/bindings/arc/*
F: drivers/tty/serial/arc_uart.c
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git

SYNOPSYS ARC SDP platform support
M: Alexey Brodkin <abrodkin@synopsys.com>
Expand Down
2 changes: 2 additions & 0 deletions arch/arc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

config ARC
def_bool y
select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
select BUILDTIME_EXTABLE_SORT
select COMMON_CLK
select CLONE_BACKWARDS
Expand All @@ -22,6 +23,7 @@ config ARC
select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
select HAVE_FUTEX_CMPXCHG
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select HAVE_KRETPROBES
Expand Down
13 changes: 7 additions & 6 deletions arch/arc/boot/dts/axc003.dtsi
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,13 @@
};

/*
* This INTC is actually connected to DW APB GPIO
* which acts as a wire between MB INTC and CPU INTC.
* GPIO INTC is configured in platform init code
* and here we mimic direct connection from MB INTC to
* CPU INTC, thus we set "interrupts = <7>" instead of
* "interrupts = <12>"
* The DW APB ICTL intc on MB is connected to CPU intc via a
* DT "invisible" DW APB GPIO block, configured to simply pass thru
* interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
*
* So here we mimic a direct connection betwen them, ignoring the
* ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
* instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
*
* This intc actually resides on MB, but we move it here to
* avoid duplicating the MB dtsi file given that IRQ from
Expand Down
1 change: 1 addition & 0 deletions arch/arc/include/asm/arcregs.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#define ARC_REG_RTT_BCR 0xF2
#define ARC_REG_IRQ_BCR 0xF3
#define ARC_REG_SMART_BCR 0xFF
#define ARC_REG_CLUSTER_BCR 0xcf

/* status32 Bits Positions */
#define STATUS_AE_BIT 5 /* Exception active */
Expand Down
8 changes: 8 additions & 0 deletions arch/arc/include/asm/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);

extern int ioc_exists;

#endif /* !__ASSEMBLY__ */

/* Instruction cache related Auxiliary registers */
Expand Down Expand Up @@ -94,4 +96,10 @@ extern void read_decode_cache_bcr(void);
#define SLC_CTRL_BUSY 0x100
#define SLC_CTRL_RGN_OP_INV 0x200

/* IO coherency related Auxiliary registers */
#define ARC_REG_IO_COH_ENABLE 0x500
#define ARC_REG_IO_COH_PARTIAL 0x501
#define ARC_REG_IO_COH_AP0_BASE 0x508
#define ARC_REG_IO_COH_AP0_SIZE 0x509

#endif /* _ASM_CACHE_H */
22 changes: 11 additions & 11 deletions arch/arc/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,18 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
sizeof(*(ptr))))

/*
* On ARC700, EX insn is inherently atomic, so by default "vanilla" xchg() need
* not require any locking. However there's a quirk.
* ARC lacks native CMPXCHG, thus emulated (see above), using external locking -
* incidently it "reuses" the same atomic_ops_lock used by atomic APIs.
* Now, llist code uses cmpxchg() and xchg() on same data, so xchg() needs to
* abide by same serializing rules, thus ends up using atomic_ops_lock as well.
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
* due to a subtle reason:
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
* Hence xchg() needs to follow same locking rules.
*
* This however is only relevant if SMP and/or ARC lacks LLSC
* if (UP or LLSC)
* xchg doesn't need serialization
* else <==> !(UP or LLSC) <==> (!UP and !LLSC) <==> (SMP and !LLSC)
* xchg needs serialization
* Technically the lock is also needed for UP (boils down to irq save/restore)
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
* be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
* Other way around, xchg is one instruction anyways, so can't be interrupted
* as such
*/

#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
Expand Down
72 changes: 43 additions & 29 deletions arch/arc/include/asm/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
\
smp_mb(); \
__asm__ __volatile__( \
"1: llock %1, [%2] \n" \
insn "\n" \
Expand All @@ -30,7 +31,7 @@
" .section .fixup,\"ax\" \n" \
" .align 4 \n" \
"4: mov %0, %4 \n" \
" b 3b \n" \
" j 3b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" .align 4 \n" \
Expand All @@ -40,12 +41,14 @@
\
: "=&r" (ret), "=&r" (oldval) \
: "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \
: "cc", "memory")
: "cc", "memory"); \
smp_mb() \

#else /* !CONFIG_ARC_HAS_LLSC */

#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
\
smp_mb(); \
__asm__ __volatile__( \
"1: ld %1, [%2] \n" \
insn "\n" \
Expand All @@ -55,7 +58,7 @@
" .section .fixup,\"ax\" \n" \
" .align 4 \n" \
"4: mov %0, %4 \n" \
" b 3b \n" \
" j 3b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" .align 4 \n" \
Expand All @@ -65,7 +68,8 @@
\
: "=&r" (ret), "=&r" (oldval) \
: "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \
: "cc", "memory")
: "cc", "memory"); \
smp_mb() \

#endif

Expand All @@ -83,13 +87,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;

#ifndef CONFIG_ARC_HAS_LLSC
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
#endif
pagefault_disable();

switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ADD:
/* oldval = *uaddr; *uaddr += oparg ; ret = *uaddr */
__futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_OR:
Expand All @@ -106,6 +114,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
}

pagefault_enable();
#ifndef CONFIG_ARC_HAS_LLSC
preempt_enable();
#endif

if (!ret) {
switch (cmp) {
Expand Down Expand Up @@ -134,54 +145,57 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
return ret;
}

/* Compare-xchg with pagefaults disabled.
* Notes:
* -Best-Effort: Exchg happens only if compare succeeds.
* If compare fails, returns; leaving retry/looping to upper layers
* -successful cmp-xchg: return orig value in @addr (same as cmp val)
* -Compare fails: return orig value in @addr
* -user access r/w fails: return -EFAULT
/*
* cmpxchg of futex (pagefaults disabled by caller)
* Return 0 for success, -EFAULT otherwise
*/
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
u32 newval)
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 expval,
u32 newval)
{
u32 val;
int ret = 0;
u32 existval;

if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;

pagefault_disable();
#ifndef CONFIG_ARC_HAS_LLSC
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
#endif
smp_mb();

__asm__ __volatile__(
#ifdef CONFIG_ARC_HAS_LLSC
"1: llock %0, [%3] \n"
" brne %0, %1, 3f \n"
"2: scond %2, [%3] \n"
"1: llock %1, [%4] \n"
" brne %1, %2, 3f \n"
"2: scond %3, [%4] \n"
" bnz 1b \n"
#else
"1: ld %0, [%3] \n"
" brne %0, %1, 3f \n"
"2: st %2, [%3] \n"
"1: ld %1, [%4] \n"
" brne %1, %2, 3f \n"
"2: st %3, [%4] \n"
#endif
"3: \n"
" .section .fixup,\"ax\" \n"
"4: mov %0, %4 \n"
" b 3b \n"
"4: mov %0, %5 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" .align 4 \n"
" .word 1b, 4b \n"
" .word 2b, 4b \n"
" .previous\n"
: "=&r"(val)
: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
: "+&r"(ret), "=&r"(existval)
: "r"(expval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
: "cc", "memory");

pagefault_enable();
smp_mb();

*uval = val;
return val;
#ifndef CONFIG_ARC_HAS_LLSC
preempt_enable();
#endif
*uval = existval;
return ret;
}

#endif
23 changes: 17 additions & 6 deletions arch/arc/include/asm/perf_event.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Linux performance counter support for ARC
*
* Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
Expand All @@ -12,8 +13,8 @@
#ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H

/* real maximum varies per CPU, this is the maximum supported by the driver */
#define ARC_PMU_MAX_HWEVENTS 64
/* Max number of counters that PCT block may ever have */
#define ARC_PERF_MAX_COUNTERS 32

#define ARC_REG_CC_BUILD 0xF6
#define ARC_REG_CC_INDEX 0x240
Expand All @@ -28,15 +29,22 @@
#define ARC_REG_PCT_CONFIG 0x254
#define ARC_REG_PCT_CONTROL 0x255
#define ARC_REG_PCT_INDEX 0x256
#define ARC_REG_PCT_INT_CNTL 0x25C
#define ARC_REG_PCT_INT_CNTH 0x25D
#define ARC_REG_PCT_INT_CTRL 0x25E
#define ARC_REG_PCT_INT_ACT 0x25F

#define ARC_REG_PCT_CONFIG_USER (1 << 18) /* count in user mode */
#define ARC_REG_PCT_CONFIG_KERN (1 << 19) /* count in kernel mode */

#define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */
#define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */

struct arc_reg_pct_build {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int m:8, c:8, r:6, s:2, v:8;
unsigned int m:8, c:8, r:5, i:1, s:2, v:8;
#else
unsigned int v:8, s:2, r:6, c:8, m:8;
unsigned int v:8, s:2, i:1, r:5, c:8, m:8;
#endif
};

Expand Down Expand Up @@ -95,10 +103,13 @@ static const char * const arc_pmu_ev_hw_map[] = {

/* counts condition */
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
#ifdef CONFIG_ISA_ARCV2
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
#else
[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */

#endif
[PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
[PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */

Expand Down
9 changes: 2 additions & 7 deletions arch/arc/kernel/entry-arcv2.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,8 @@ VECTOR handle_interrupt ; (23) End of fixed IRQs

.section .text, "ax",@progbits

res_service: ; processor restart
flag 0x1 ; not implemented
nop
nop

reserved: ; processor restart
rtie ; jump to processor initializations
reserved:
flag 1 ; Unexpected event, halt

;##################### Interrupt Handling ##############################

Expand Down
6 changes: 3 additions & 3 deletions arch/arc/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ ENTRY(ret_from_fork)
; when the forked child comes here from the __switch_to function
; r0 has the last task pointer.
; put last task in scheduler queue
bl @schedule_tail
jl @schedule_tail

ld r9, [sp, PT_status32]
brne r9, 0, 1f
Expand Down Expand Up @@ -320,7 +320,7 @@ resume_user_mode_begin:
; --- (Slow Path #1) task preemption ---
bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals
mov blink, resume_user_mode_begin ; tail-call to U mode ret chks
b @schedule ; BTST+Bnz causes relo error in link
j @schedule ; BTST+Bnz causes relo error in link

.Lchk_pend_signals:
IRQ_ENABLE r10
Expand Down Expand Up @@ -381,7 +381,7 @@ resume_kernel_mode:
bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs

; Invoke PREEMPTION
bl preempt_schedule_irq
jl preempt_schedule_irq

; preempt_schedule_irq() always returns with IRQ disabled
#endif
Expand Down
Loading

0 comments on commit 28dce7c

Please sign in to comment.