Skip to content

Commit

Permalink
riscv: Implement arch_cmpxchg128() using Zacas
Browse files Browse the repository at this point in the history
Now that Zacas is supported in the kernel, let's use the double word
atomic version of amocas to improve the SLUB allocator.

Note that we have to select fixed registers, otherwise gcc fails to pick
even registers and then produces a reserved encoding which fails to
assemble.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-8-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
  • Loading branch information
Alexandre Ghiti authored and Palmer Dabbelt committed Nov 11, 2024
1 parent 6116e22 commit f7bd2be
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
1 change: 1 addition & 0 deletions arch/riscv/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ config RISCV
select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HARDIRQS_SW_RESEND
select HAS_IOPORT if MMU
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
Expand Down
38 changes: 38 additions & 0 deletions arch/riscv/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,44 @@
arch_cmpxchg_release((ptr), (o), (n)); \
})

#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)

#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)

union __u128_halves {
u128 full;
struct {
u64 low, high;
};
};

#define __arch_cmpxchg128(p, o, n, cas_sfx) \
({ \
__typeof__(*(p)) __o = (o); \
union __u128_halves __hn = { .full = (n) }; \
union __u128_halves __ho = { .full = (__o) }; \
register unsigned long t1 asm ("t1") = __hn.low; \
register unsigned long t2 asm ("t2") = __hn.high; \
register unsigned long t3 asm ("t3") = __ho.low; \
register unsigned long t4 asm ("t4") = __ho.high; \
\
__asm__ __volatile__ ( \
" amocas.q" cas_sfx " %0, %z3, %2" \
: "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
: "rJ" (t1), "rJ" (t2) \
: "memory"); \
\
((u128)t4 << 64) | t3; \
})

#define arch_cmpxchg128(ptr, o, n) \
__arch_cmpxchg128((ptr), (o), (n), ".aqrl")

#define arch_cmpxchg128_local(ptr, o, n) \
__arch_cmpxchg128((ptr), (o), (n), "")

#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */

#ifdef CONFIG_RISCV_ISA_ZAWRS
/*
* Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
Expand Down

0 comments on commit f7bd2be

Please sign in to comment.