Skip to content

Commit

Permalink
[ARM] cache align destination pointer when copying memory for some pr…
Browse files Browse the repository at this point in the history
…ocessors

The implementation for memory copy functions on ARM had a (disabled)
provision for aligning the source pointer before loading registers with
data.  Turns out that aligning the _destination_ pointer is much more
useful, as the read side is already sufficiently helped with the use of
preload.

So this changes the definition of the CALGN() macro to target the
destination pointer instead, and turns it on for Feroceon processors
where the gain is very noticeable.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
  • Loading branch information
Nicolas Pitre authored and Lennert Buytenhek committed Jun 22, 2008
1 parent 4c4925c commit 2239aff
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 20 deletions.
12 changes: 2 additions & 10 deletions arch/arm/lib/copy_template.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,6 @@
* published by the Free Software Foundation.
*/

/*
* This can be used to enable code to cacheline align the source pointer.
* Experiments on tested architectures (StrongARM and XScale) didn't show
* this a worthwhile thing to do. That might be different in the future.
*/
//#define CALGN(code...) code
#define CALGN(code...)

/*
* Theory of operation
* -------------------
Expand Down Expand Up @@ -82,7 +74,7 @@
stmfd sp!, {r5 - r8}
blt 5f

CALGN( ands ip, r1, #31 )
CALGN( ands ip, r0, #31 )
CALGN( rsb r3, ip, #32 )
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
CALGN( bcs 2f )
Expand Down Expand Up @@ -168,7 +160,7 @@
subs r2, r2, #28
blt 14f

CALGN( ands ip, r1, #31 )
CALGN( ands ip, r0, #31 )
CALGN( rsb ip, ip, #32 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
Expand Down
12 changes: 2 additions & 10 deletions arch/arm/lib/memmove.S
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@
#include <linux/linkage.h>
#include <asm/assembler.h>

/*
* This can be used to enable code to cacheline align the source pointer.
* Experiments on tested architectures (StrongARM and XScale) didn't show
* this a worthwhile thing to do. That might be different in the future.
*/
//#define CALGN(code...) code
#define CALGN(code...)

.text

/*
Expand Down Expand Up @@ -55,7 +47,7 @@ ENTRY(memmove)
stmfd sp!, {r5 - r8}
blt 5f

CALGN( ands ip, r1, #31 )
CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( bcs 2f )
CALGN( adr r4, 6f )
Expand Down Expand Up @@ -139,7 +131,7 @@ ENTRY(memmove)
subs r2, r2, #28
blt 14f

CALGN( ands ip, r1, #31 )
CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
Expand Down
15 changes: 15 additions & 0 deletions include/asm-arm/assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,21 @@
#define PLD(code...)
#endif

/*
* This can be used to enable code to cacheline align the destination
* pointer when bulk writing to memory. Experiments on StrongARM and
* XScale didn't show this a worthwhile thing to do when the cache is not
* set to write-allocate (this would need further testing on XScale when WA
* is used).
*
* On Feroceon there is much to gain however, regardless of cache mode.
*/
#ifdef CONFIG_CPU_FEROCEON
#define CALGN(code...) code
#else
#define CALGN(code...)
#endif

/*
* Enable and disable interrupts
*/
Expand Down

0 comments on commit 2239aff

Please sign in to comment.