Skip to content

Commit

Permalink
x86/crc32: update prototype for crc_pcl()
Browse files Browse the repository at this point in the history
- Change the len parameter from unsigned int to size_t, so that the
  library function which takes a size_t can safely use this code.

- Rename to crc32c_x86_3way() which is much clearer.

- Move the crc parameter to the front, as this is the usual convention.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20241202010844.144356-12-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
  • Loading branch information
Eric Biggers committed Dec 2, 2024
1 parent 0f60a8a commit 64e3586
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 35 deletions.
7 changes: 3 additions & 4 deletions arch/x86/crypto/crc32c-intel_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@
*/
#define CRC32C_PCL_BREAKEVEN 512

asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
unsigned int crc_init);
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
#endif /* CONFIG_X86_64 */

static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
Expand Down Expand Up @@ -159,7 +158,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
*/
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
*crcp = crc32c_x86_3way(*crcp, data, len);
kernel_fpu_end();
} else
*crcp = crc32c_intel_le_hw(*crcp, data, len);
Expand All @@ -171,7 +170,7 @@ static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
{
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
*(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
kernel_fpu_end();
} else
*(__le32 *)out =
Expand Down
63 changes: 32 additions & 31 deletions arch/x86/crypto/crc32c-pcl-intel-asm_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,16 @@
# regular CRC code that does not interleave the CRC instructions.
#define SMALL_SIZE 200

# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);

.text
SYM_FUNC_START(crc_pcl)
#define bufp %rdi
#define bufp_d %edi
#define len %esi
#define crc_init %edx
#define crc_init_q %rdx
SYM_FUNC_START(crc32c_x86_3way)
#define crc0 %edi
#define crc0_q %rdi
#define bufp %rsi
#define bufp_d %esi
#define len %rdx
#define len_dw %edx
#define n_misaligned %ecx /* overlaps chunk_bytes! */
#define n_misaligned_q %rcx
#define chunk_bytes %ecx /* overlaps n_misaligned! */
Expand All @@ -85,9 +86,9 @@ SYM_FUNC_START(crc_pcl)
.Ldo_align:
movq (bufp), %rax
add n_misaligned_q, bufp
sub n_misaligned, len
sub n_misaligned_q, len
.Lalign_loop:
crc32b %al, crc_init # compute crc32 of 1-byte
crc32b %al, crc0 # compute crc32 of 1-byte
shr $8, %rax # get next byte
dec n_misaligned
jne .Lalign_loop
Expand All @@ -102,7 +103,7 @@ SYM_FUNC_START(crc_pcl)

.Lpartial_block:
# Compute floor(len / 24) to get num qwords to process from each lane.
imul $2731, len, %eax # 2731 = ceil(2^16 / 24)
imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24)
shr $16, %eax
jmp .Lcrc_3lanes

Expand All @@ -125,16 +126,16 @@ SYM_FUNC_START(crc_pcl)
# Unroll the loop by a factor of 4 to reduce the overhead of the loop
# bookkeeping instructions, which can compete with crc32q for the ALUs.
.Lcrc_3lanes_4x_loop:
crc32q (bufp), crc_init_q
crc32q (bufp), crc0_q
crc32q (bufp,chunk_bytes_q), crc1
crc32q (bufp,chunk_bytes_q,2), crc2
crc32q 8(bufp), crc_init_q
crc32q 8(bufp), crc0_q
crc32q 8(bufp,chunk_bytes_q), crc1
crc32q 8(bufp,chunk_bytes_q,2), crc2
crc32q 16(bufp), crc_init_q
crc32q 16(bufp), crc0_q
crc32q 16(bufp,chunk_bytes_q), crc1
crc32q 16(bufp,chunk_bytes_q,2), crc2
crc32q 24(bufp), crc_init_q
crc32q 24(bufp), crc0_q
crc32q 24(bufp,chunk_bytes_q), crc1
crc32q 24(bufp,chunk_bytes_q,2), crc2
add $32, bufp
Expand All @@ -146,15 +147,15 @@ SYM_FUNC_START(crc_pcl)
jz .Lcrc_3lanes_last_qword

.Lcrc_3lanes_1x_loop:
crc32q (bufp), crc_init_q
crc32q (bufp), crc0_q
crc32q (bufp,chunk_bytes_q), crc1
crc32q (bufp,chunk_bytes_q,2), crc2
add $8, bufp
dec %eax
jnz .Lcrc_3lanes_1x_loop

.Lcrc_3lanes_last_qword:
crc32q (bufp), crc_init_q
crc32q (bufp), crc0_q
crc32q (bufp,chunk_bytes_q), crc1
# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet

Expand All @@ -165,9 +166,9 @@ SYM_FUNC_START(crc_pcl)
lea (K_table-8)(%rip), %rax # first entry is for idx 1
pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
sub %eax, len # len -= chunk_bytes * 3
sub %rax, len # len -= chunk_bytes * 3

movq crc_init_q, %xmm1 # CRC for block 1
movq crc0_q, %xmm1 # CRC for block 1
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2

movq crc1, %xmm2 # CRC for block 2
Expand All @@ -176,8 +177,8 @@ SYM_FUNC_START(crc_pcl)
pxor %xmm2,%xmm1
movq %xmm1, %rax
xor (bufp,chunk_bytes_q,2), %rax
mov crc2, crc_init_q
crc32 %rax, crc_init_q
mov crc2, crc0_q
crc32 %rax, crc0_q
lea 8(bufp,chunk_bytes_q,2), bufp

################################################################
Expand All @@ -193,34 +194,34 @@ SYM_FUNC_START(crc_pcl)
## 6) Process any remainder without interleaving:
#######################################################################
.Lsmall:
test len, len
test len_dw, len_dw
jz .Ldone
mov len, %eax
mov len_dw, %eax
shr $3, %eax
jz .Ldo_dword
.Ldo_qwords:
crc32q (bufp), crc_init_q
crc32q (bufp), crc0_q
add $8, bufp
dec %eax
jnz .Ldo_qwords
.Ldo_dword:
test $4, len
test $4, len_dw
jz .Ldo_word
crc32l (bufp), crc_init
crc32l (bufp), crc0
add $4, bufp
.Ldo_word:
test $2, len
test $2, len_dw
jz .Ldo_byte
crc32w (bufp), crc_init
crc32w (bufp), crc0
add $2, bufp
.Ldo_byte:
test $1, len
test $1, len_dw
jz .Ldone
crc32b (bufp), crc_init
crc32b (bufp), crc0
.Ldone:
mov crc_init, %eax
mov crc0, %eax
RET
SYM_FUNC_END(crc_pcl)
SYM_FUNC_END(crc32c_x86_3way)

.section .rodata, "a", @progbits
################################################################
Expand Down

0 comments on commit 64e3586

Please sign in to comment.