Skip to content

Commit

Permalink
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/ebiggers/linux

Pull CRC cleanups from Eric Biggers:
 "Simplify the kconfig options for controlling which CRC implementations
  are built into the kernel, as was requested by Linus.

  This means making the option to disable the arch code visible only
  when CONFIG_EXPERT=y, and standardizing on a single generic
  implementation of CRC32"

* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  lib/crc32: remove other generic implementations
  lib/crc: simplify the kconfig options for CRC implementations
  • Loading branch information
Linus Torvalds committed Jan 29, 2025
2 parents af13ff1 + 5e3c1c4 commit fed3819
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 462 deletions.
118 changes: 13 additions & 105 deletions lib/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -164,34 +164,9 @@ config CRC_T10DIF
config ARCH_HAS_CRC_T10DIF
bool

choice
prompt "CRC-T10DIF implementation"
depends on CRC_T10DIF
default CRC_T10DIF_IMPL_ARCH if ARCH_HAS_CRC_T10DIF
default CRC_T10DIF_IMPL_GENERIC if !ARCH_HAS_CRC_T10DIF
help
This option allows you to override the default choice of CRC-T10DIF
implementation.

config CRC_T10DIF_IMPL_ARCH
bool "Architecture-optimized" if ARCH_HAS_CRC_T10DIF
help
Use the optimized implementation of CRC-T10DIF for the selected
architecture. It is recommended to keep this enabled, as it can
greatly improve CRC-T10DIF performance.

config CRC_T10DIF_IMPL_GENERIC
bool "Generic implementation"
help
Use the generic table-based implementation of CRC-T10DIF. Selecting
this will reduce code size slightly but can greatly reduce CRC-T10DIF
performance.

endchoice

config CRC_T10DIF_ARCH
tristate
default CRC_T10DIF if CRC_T10DIF_IMPL_ARCH
default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS

config CRC64_ROCKSOFT
tristate "CRC calculation for the Rocksoft model CRC64"
Expand Down Expand Up @@ -223,87 +198,9 @@ config CRC32
config ARCH_HAS_CRC32
bool

choice
prompt "CRC32 implementation"
depends on CRC32
default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32
default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32
help
This option allows you to override the default choice of CRC32
implementation. Choose the default unless you know that you need one
of the others.

config CRC32_IMPL_ARCH_PLUS_SLICEBY8
bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32
help
Use architecture-optimized implementation of CRC32. Fall back to
slice-by-8 in cases where the arch-optimized implementation cannot be
used, e.g. if the CPU lacks support for the needed instructions.

This is the default when an arch-optimized implementation exists.

config CRC32_IMPL_ARCH_PLUS_SLICEBY1
bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32
help
Use architecture-optimized implementation of CRC32, but fall back to
slice-by-1 instead of slice-by-8 in order to reduce the binary size.

config CRC32_IMPL_SLICEBY8
bool "Slice by 8 bytes"
help
Calculate checksum 8 bytes at a time with a clever slicing algorithm.
This is much slower than the architecture-optimized implementation of
CRC32 (if the selected arch has one), but it is portable and is the
fastest implementation when no arch-optimized implementation is
available. It uses an 8KiB lookup table. Most modern processors have
enough cache to hold this table without thrashing the cache.

config CRC32_IMPL_SLICEBY4
bool "Slice by 4 bytes"
help
Calculate checksum 4 bytes at a time with a clever slicing algorithm.
This is a bit slower than slice by 8, but has a smaller 4KiB lookup
table.

Only choose this option if you know what you are doing.

config CRC32_IMPL_SLICEBY1
bool "Slice by 1 byte (Sarwate's algorithm)"
help
Calculate checksum a byte at a time using Sarwate's algorithm. This
is not particularly fast, but has a small 1KiB lookup table.

Only choose this option if you know what you are doing.

config CRC32_IMPL_BIT
bool "Classic Algorithm (one bit at a time)"
help
Calculate checksum one bit at a time. This is VERY slow, but has
no lookup table. This is provided as a debugging option.

Only choose this option if you are debugging crc32.

endchoice

config CRC32_ARCH
tristate
default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1

config CRC32_SLICEBY8
bool
default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8

config CRC32_SLICEBY4
bool
default y if CRC32_IMPL_SLICEBY4

config CRC32_SARWATE
bool
default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1

config CRC32_BIT
bool
default y if CRC32_IMPL_BIT
default CRC32 if ARCH_HAS_CRC32 && CRC_OPTIMIZATIONS

config CRC64
tristate "CRC64 functions"
Expand Down Expand Up @@ -343,6 +240,17 @@ config CRC8
when they need to do cyclic redundancy check according CRC8
algorithm. Module will be called crc8.

config CRC_OPTIMIZATIONS
bool "Enable optimized CRC implementations" if EXPERT
default y
help
Disabling this option reduces code size slightly by disabling the
architecture-optimized implementations of any CRC variants that are
enabled. CRC checksumming performance may get much slower.

Keep this enabled unless you're really trying to minimize the size of
the kernel.

config XXHASH
tristate

Expand Down
225 changes: 10 additions & 215 deletions lib/crc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,178 +30,27 @@
#include <linux/crc32poly.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include "crc32defs.h"

#if CRC_LE_BITS > 8
# define tole(x) ((__force u32) cpu_to_le32(x))
#else
# define tole(x) (x)
#endif

#if CRC_BE_BITS > 8
# define tobe(x) ((__force u32) cpu_to_be32(x))
#else
# define tobe(x) (x)
#endif

#include "crc32table.h"

MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
MODULE_DESCRIPTION("Various CRC32 calculations");
MODULE_LICENSE("GPL");

#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8

/* implements slicing-by-4 or slicing-by-8 algorithm */
static inline u32 __pure
crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
{
# ifdef __LITTLE_ENDIAN
# define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
# define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \
t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255])
# define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \
t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255])
# else
# define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
# define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \
t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255])
# define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \
t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255])
# endif
const u32 *b;
size_t rem_len;
# ifdef CONFIG_X86
size_t i;
# endif
const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
# if CRC_LE_BITS != 32
const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
# endif
u32 q;

/* Align it */
if (unlikely((long)buf & 3 && len)) {
do {
DO_CRC(*buf++);
} while ((--len) && ((long)buf)&3);
}

# if CRC_LE_BITS == 32
rem_len = len & 3;
len = len >> 2;
# else
rem_len = len & 7;
len = len >> 3;
# endif

b = (const u32 *)buf;
# ifdef CONFIG_X86
--b;
for (i = 0; i < len; i++) {
# else
for (--b; len; --len) {
# endif
q = crc ^ *++b; /* use pre increment for speed */
# if CRC_LE_BITS == 32
crc = DO_CRC4;
# else
crc = DO_CRC8;
q = *++b;
crc ^= DO_CRC4;
# endif
}
len = rem_len;
/* And the last few bytes */
if (len) {
u8 *p = (u8 *)(b + 1) - 1;
# ifdef CONFIG_X86
for (i = 0; i < len; i++)
DO_CRC(*++p); /* use pre increment for speed */
# else
do {
DO_CRC(*++p); /* use pre increment for speed */
} while (--len);
# endif
}
return crc;
#undef DO_CRC
#undef DO_CRC4
#undef DO_CRC8
}
#endif


/**
* crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
* CRC32/CRC32C
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other
* uses, or the previous crc32/crc32c value if computing incrementally.
* @p: pointer to buffer over which CRC32/CRC32C is run
* @len: length of buffer @p
* @tab: little-endian Ethernet table
* @polynomial: CRC32/CRC32c LE polynomial
*/
static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
size_t len, const u32 (*tab)[256],
u32 polynomial)
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
#if CRC_LE_BITS == 1
int i;
while (len--) {
crc ^= *p++;
for (i = 0; i < 8; i++)
crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
}
# elif CRC_LE_BITS == 2
while (len--) {
crc ^= *p++;
crc = (crc >> 2) ^ tab[0][crc & 3];
crc = (crc >> 2) ^ tab[0][crc & 3];
crc = (crc >> 2) ^ tab[0][crc & 3];
crc = (crc >> 2) ^ tab[0][crc & 3];
}
# elif CRC_LE_BITS == 4
while (len--) {
crc ^= *p++;
crc = (crc >> 4) ^ tab[0][crc & 15];
crc = (crc >> 4) ^ tab[0][crc & 15];
}
# elif CRC_LE_BITS == 8
/* aka Sarwate algorithm */
while (len--) {
crc ^= *p++;
crc = (crc >> 8) ^ tab[0][crc & 255];
}
# else
crc = (__force u32) __cpu_to_le32(crc);
crc = crc32_body(crc, p, len, tab);
crc = __le32_to_cpu((__force __le32)crc);
#endif
while (len--)
crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++];
return crc;
}
EXPORT_SYMBOL(crc32_le_base);

#if CRC_LE_BITS == 1
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
}
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
}
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
while (len--)
crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++];
return crc;
}
#endif
EXPORT_SYMBOL(crc32_le_base);
EXPORT_SYMBOL(crc32c_le_base);

/*
Expand Down Expand Up @@ -277,64 +126,10 @@ u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
EXPORT_SYMBOL(crc32_le_shift);
EXPORT_SYMBOL(__crc32c_le_shift);

/**
* crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
* other uses, or the previous crc32 value if computing incrementally.
* @p: pointer to buffer over which CRC32 is run
* @len: length of buffer @p
* @tab: big-endian Ethernet table
* @polynomial: CRC32 BE polynomial
*/
static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
size_t len, const u32 (*tab)[256],
u32 polynomial)
{
#if CRC_BE_BITS == 1
int i;
while (len--) {
crc ^= *p++ << 24;
for (i = 0; i < 8; i++)
crc =
(crc << 1) ^ ((crc & 0x80000000) ? polynomial :
0);
}
# elif CRC_BE_BITS == 2
while (len--) {
crc ^= *p++ << 24;
crc = (crc << 2) ^ tab[0][crc >> 30];
crc = (crc << 2) ^ tab[0][crc >> 30];
crc = (crc << 2) ^ tab[0][crc >> 30];
crc = (crc << 2) ^ tab[0][crc >> 30];
}
# elif CRC_BE_BITS == 4
while (len--) {
crc ^= *p++ << 24;
crc = (crc << 4) ^ tab[0][crc >> 28];
crc = (crc << 4) ^ tab[0][crc >> 28];
}
# elif CRC_BE_BITS == 8
while (len--) {
crc ^= *p++ << 24;
crc = (crc << 8) ^ tab[0][crc >> 24];
}
# else
crc = (__force u32) __cpu_to_be32(crc);
crc = crc32_body(crc, p, len, tab);
crc = __be32_to_cpu((__force __be32)crc);
# endif
return crc;
}

#if CRC_BE_BITS == 1
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
}
#else
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
{
return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
while (len--)
crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++];
return crc;
}
#endif
EXPORT_SYMBOL(crc32_be_base);
Loading

0 comments on commit fed3819

Please sign in to comment.