Skip to content

Commit

Permalink
arch/tile: optimize get_user/put_user and friends
Browse files Browse the repository at this point in the history
Use direct load/store for the get_user/put_user.

Previously, we would call out to a helper routine that would do the
appropriate thing and then return, handling the possible exception
internally.  Now we inline the load or store, along with a "we succeeded"
indication in a register; if the load or store faults, we write a
"we failed" indication into the same register and then return to the
following instruction.  This is more efficient and gives us more compact
code, as well as being more in line with what other architectures do.

The special futex assembly source file for TILE-Gx also disappears in
this change; we just use the same inlining idiom there as well, putting
the appropriate atomic operations directly into futex_atomic_op_inuser()
(and thus into the FUTEX_WAIT function).

The underlying atomic copy_from_user, copy_to_user functions were
renamed using the (cryptic) x86 convention as copy_from_user_ll and
copy_to_user_ll.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
  • Loading branch information
Chris Metcalf committed May 25, 2012
1 parent 1efea40 commit 47d632f
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 315 deletions.
10 changes: 10 additions & 0 deletions arch/tile/include/asm/atomic_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,14 @@ void __init_atomic_per_cpu(void);
void __atomic_fault_unlock(int *lock_ptr);
#endif

/* Return a pointer to the lock for the given address. */
int *__atomic_hashed_lock(volatile void *v);

/* Private helper routines in lib/atomic_asm_32.S */
struct __get_user {
unsigned long val;
int err;
};
extern struct __get_user __atomic_cmpxchg(volatile int *p,
int *lock, int o, int n);
extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
Expand All @@ -319,6 +326,9 @@ extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
int *lock, u64 o, u64 n);

/* Return failure from the atomic wrappers. */
struct __get_user __atomic_bad_address(int __user *addr);

#endif /* !__ASSEMBLY__ */

#endif /* _ASM_TILE_ATOMIC_32_H */
143 changes: 98 additions & 45 deletions arch/tile/include/asm/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,81 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <linux/errno.h>
#include <asm/atomic.h>

extern struct __get_user futex_set(u32 __user *v, int i);
extern struct __get_user futex_add(u32 __user *v, int n);
extern struct __get_user futex_or(u32 __user *v, int n);
extern struct __get_user futex_andn(u32 __user *v, int n);
extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
/*
* Support macros for futex operations. Do not use these macros directly.
* They assume "ret", "val", "oparg", and "uaddr" in the lexical context.
* __futex_cmpxchg() additionally assumes "oldval".
*/

#ifdef __tilegx__

#define __futex_asm(OP) \
asm("1: {" #OP " %1, %3, %4; movei %0, 0 }\n" \
".pushsection .fixup,\"ax\"\n" \
"0: { movei %0, %5; j 9f }\n" \
".section __ex_table,\"a\"\n" \
".quad 1b, 0b\n" \
".popsection\n" \
"9:" \
: "=r" (ret), "=r" (val), "+m" (*(uaddr)) \
: "r" (uaddr), "r" (oparg), "i" (-EFAULT))

#define __futex_set() __futex_asm(exch4)
#define __futex_add() __futex_asm(fetchadd4)
#define __futex_or() __futex_asm(fetchor4)
#define __futex_andn() ({ oparg = ~oparg; __futex_asm(fetchand4); })
#define __futex_cmpxchg() \
({ __insn_mtspr(SPR_CMPEXCH_VALUE, oldval); __futex_asm(cmpexch4); })

#define __futex_xor() \
({ \
u32 oldval, n = oparg; \
if ((ret = __get_user(oldval, uaddr)) == 0) { \
do { \
oparg = oldval ^ n; \
__futex_cmpxchg(); \
} while (ret == 0 && oldval != val); \
} \
})

/* No need to prefetch, since the atomic ops go to the home cache anyway. */
#define __futex_prolog()

#ifndef __tilegx__
extern struct __get_user futex_xor(u32 __user *v, int n);
#else
static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
{
struct __get_user asm_ret = __get_user_4(uaddr);
if (!asm_ret.err) {
int oldval, newval;
do {
oldval = asm_ret.val;
newval = oldval ^ n;
asm_ret = futex_cmpxchg(uaddr, oldval, newval);
} while (asm_ret.err == 0 && oldval != asm_ret.val);

#define __futex_call(FN) \
{ \
struct __get_user gu = FN((u32 __force *)uaddr, lock, oparg); \
val = gu.val; \
ret = gu.err; \
}
return asm_ret;
}

#define __futex_set() __futex_call(__atomic_xchg)
#define __futex_add() __futex_call(__atomic_xchg_add)
#define __futex_or() __futex_call(__atomic_or)
#define __futex_andn() __futex_call(__atomic_andn)
#define __futex_xor() __futex_call(__atomic_xor)

#define __futex_cmpxchg() \
{ \
struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
lock, oldval, oparg); \
val = gu.val; \
ret = gu.err; \
}

/*
* Find the lock pointer for the atomic calls to use, and issue a
* prefetch to the user address to bring it into cache. Similar to
* __atomic_setup(), but we can't do a read into the L1 since it might
* fault; instead we do a prefetch into the L2.
*/
#define __futex_prolog() \
int *lock; \
__insn_prefetch(uaddr); \
lock = __atomic_hashed_lock((int __force *)uaddr)
#endif

static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
Expand All @@ -59,8 +111,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int ret;
struct __get_user asm_ret;
int uninitialized_var(val), ret;

__futex_prolog();

/* The 32-bit futex code makes this assumption, so validate it here. */
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));

if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
Expand All @@ -71,46 +127,45 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
asm_ret = futex_set(uaddr, oparg);
__futex_set();
break;
case FUTEX_OP_ADD:
asm_ret = futex_add(uaddr, oparg);
__futex_add();
break;
case FUTEX_OP_OR:
asm_ret = futex_or(uaddr, oparg);
__futex_or();
break;
case FUTEX_OP_ANDN:
asm_ret = futex_andn(uaddr, oparg);
__futex_andn();
break;
case FUTEX_OP_XOR:
asm_ret = futex_xor(uaddr, oparg);
__futex_xor();
break;
default:
asm_ret.err = -ENOSYS;
ret = -ENOSYS;
break;
}
pagefault_enable();

ret = asm_ret.err;

if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ:
ret = (asm_ret.val == cmparg);
ret = (val == cmparg);
break;
case FUTEX_OP_CMP_NE:
ret = (asm_ret.val != cmparg);
ret = (val != cmparg);
break;
case FUTEX_OP_CMP_LT:
ret = (asm_ret.val < cmparg);
ret = (val < cmparg);
break;
case FUTEX_OP_CMP_GE:
ret = (asm_ret.val >= cmparg);
ret = (val >= cmparg);
break;
case FUTEX_OP_CMP_LE:
ret = (asm_ret.val <= cmparg);
ret = (val <= cmparg);
break;
case FUTEX_OP_CMP_GT:
ret = (asm_ret.val > cmparg);
ret = (val > cmparg);
break;
default:
ret = -ENOSYS;
Expand All @@ -120,22 +175,20 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
}

static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
u32 oldval, u32 oparg)
{
struct __get_user asm_ret;
int ret, val;

__futex_prolog();

if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;

asm_ret = futex_cmpxchg(uaddr, oldval, newval);
*uval = asm_ret.val;
return asm_ret.err;
}
__futex_cmpxchg();

#ifndef __tilegx__
/* Return failure from the atomic wrappers. */
struct __get_user __atomic_bad_address(int __user *addr);
#endif
*uval = val;
return ret;
}

#endif /* !__ASSEMBLY__ */

Expand Down
Loading

0 comments on commit 47d632f

Please sign in to comment.