-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'sparc64-optimized-fls'
Vijay Kumar says: ==================== sparc64: Optimize fls and __fls SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, __fls and fls64 functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, __fls and fls64 functions. v3->v4: - Fixed a typo. v2->v3: - Using ENTRY(), ENDPROC() for assembler functions. - Removed BITS_PER_LONG from __fls. - Using generic fls64(). - Replaced lzcnt instruction with .word directive. v1->v2: - Fixed delay slot issue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
- Loading branch information
Showing
7 changed files
with
175 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. | ||
* | ||
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. | ||
*/ | ||
|
||
#include <linux/linkage.h> | ||
|
||
#define LZCNT_O0_G2 \ | ||
.word 0x85b002e8 | ||
|
||
.text | ||
.register %g2, #scratch | ||
.register %g3, #scratch | ||
|
||
ENTRY(NG4fls) | ||
LZCNT_O0_G2 !lzcnt %o0, %g2 | ||
mov 64, %g3 | ||
retl | ||
sub %g3, %g2, %o0 | ||
ENDPROC(NG4fls) | ||
|
||
ENTRY(__NG4fls) | ||
brz,pn %o0, 1f | ||
LZCNT_O0_G2 !lzcnt %o0, %g2 | ||
mov 63, %g3 | ||
sub %g3, %g2, %o0 | ||
1: | ||
retl | ||
nop | ||
ENDPROC(__NG4fls) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* fls.S: SPARC default fls definition. | ||
* | ||
* SPARC default fls definition, which follows the same algorithm as | ||
* in generic fls(). This function will be boot time patched on T4 | ||
* and onward. | ||
*/ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/export.h> | ||
|
||
.text | ||
.register %g2, #scratch | ||
.register %g3, #scratch | ||
ENTRY(fls) | ||
brz,pn %o0, 6f | ||
mov 0, %o1 | ||
sethi %hi(0xffff0000), %g3 | ||
mov %o0, %g2 | ||
andcc %o0, %g3, %g0 | ||
be,pt %icc, 8f | ||
mov 32, %o1 | ||
sethi %hi(0xff000000), %g3 | ||
andcc %g2, %g3, %g0 | ||
bne,pt %icc, 3f | ||
sethi %hi(0xf0000000), %g3 | ||
sll %o0, 8, %o0 | ||
1: | ||
add %o1, -8, %o1 | ||
sra %o0, 0, %o0 | ||
mov %o0, %g2 | ||
2: | ||
sethi %hi(0xf0000000), %g3 | ||
3: | ||
andcc %g2, %g3, %g0 | ||
bne,pt %icc, 4f | ||
sethi %hi(0xc0000000), %g3 | ||
sll %o0, 4, %o0 | ||
add %o1, -4, %o1 | ||
sra %o0, 0, %o0 | ||
mov %o0, %g2 | ||
4: | ||
andcc %g2, %g3, %g0 | ||
be,a,pt %icc, 7f | ||
sll %o0, 2, %o0 | ||
5: | ||
xnor %g0, %o0, %o0 | ||
srl %o0, 31, %o0 | ||
sub %o1, %o0, %o1 | ||
6: | ||
jmp %o7 + 8 | ||
sra %o1, 0, %o0 | ||
7: | ||
add %o1, -2, %o1 | ||
ba,pt %xcc, 5b | ||
sra %o0, 0, %o0 | ||
8: | ||
sll %o0, 16, %o0 | ||
sethi %hi(0xff000000), %g3 | ||
sra %o0, 0, %o0 | ||
mov %o0, %g2 | ||
andcc %g2, %g3, %g0 | ||
bne,pt %icc, 2b | ||
mov 16, %o1 | ||
ba,pt %xcc, 1b | ||
sll %o0, 8, %o0 | ||
ENDPROC(fls) | ||
EXPORT_SYMBOL(fls) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* fls64.S: SPARC default __fls definition. | ||
* | ||
* SPARC default __fls definition, which follows the same algorithm as | ||
* in generic __fls(). This function will be boot time patched on T4 | ||
* and onward. | ||
*/ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/export.h> | ||
|
||
.text | ||
.register %g2, #scratch | ||
.register %g3, #scratch | ||
ENTRY(__fls) | ||
mov -1, %g2 | ||
sllx %g2, 32, %g2 | ||
and %o0, %g2, %g2 | ||
brnz,pt %g2, 1f | ||
mov 63, %g1 | ||
sllx %o0, 32, %o0 | ||
mov 31, %g1 | ||
1: | ||
mov -1, %g2 | ||
sllx %g2, 48, %g2 | ||
and %o0, %g2, %g2 | ||
brnz,pt %g2, 2f | ||
mov -1, %g2 | ||
sllx %o0, 16, %o0 | ||
add %g1, -16, %g1 | ||
2: | ||
mov -1, %g2 | ||
sllx %g2, 56, %g2 | ||
and %o0, %g2, %g2 | ||
brnz,pt %g2, 3f | ||
mov -1, %g2 | ||
sllx %o0, 8, %o0 | ||
add %g1, -8, %g1 | ||
3: | ||
sllx %g2, 60, %g2 | ||
and %o0, %g2, %g2 | ||
brnz,pt %g2, 4f | ||
mov -1, %g2 | ||
sllx %o0, 4, %o0 | ||
add %g1, -4, %g1 | ||
4: | ||
sllx %g2, 62, %g2 | ||
and %o0, %g2, %g2 | ||
brnz,pt %g2, 5f | ||
mov -1, %g3 | ||
sllx %o0, 2, %o0 | ||
add %g1, -2, %g1 | ||
5: | ||
mov 0, %g2 | ||
sllx %g3, 63, %g3 | ||
and %o0, %g3, %o0 | ||
movre %o0, 1, %g2 | ||
sub %g1, %g2, %g1 | ||
jmp %o7+8 | ||
sra %g1, 0, %o0 | ||
ENDPROC(__fls) | ||
EXPORT_SYMBOL(__fls) |