Skip to content

Commit

Permalink
zstd: import usptream v1.5.2
Browse files Browse the repository at this point in the history
Updates the kernel's zstd library to v1.5.2, the latest zstd release.
The upstream tag it is updated to is `v1.5.2-kernel`, which contains
several cherry-picked commits on top of the v1.5.2 release which are
required for the kernel update. I will create this tag once the PR is
ready to merge, until then reference the temporary upstream branch
`v1.5.2-kernel-cherrypicks`.

I plan to submit this patch as part of the v6.2 merge window.

I've done basic build testing & testing on x86-64, i386, and aarch64.
I'm merging these patches into my `zstd-next` branch, which is pulled
into `linux-next` for further testing.

I've benchmarked BtrFS with zstd compression on a x86-64 machine, and
saw these results. Decompression speed is a small win across the board.
The lower compression levels 1-4 see both compression speed and
compression ratio wins. The higher compression levels see a small
compression speed loss and about neutral ratio. I expect the lower
compression levels to be used much more heavily than the high
compression levels, so this should be a net win.

Level	CTime	DTime	Ratio
1	-2.95%	-1.1%	-0.7%
3	-3.5%	-1.2%	-0.5%
5	+3.7%	-1.0%	+0.0%
7	+3.2%	-0.9%	+0.0%
9	-4.3%	-0.8%	+0.1%

Signed-off-by: Nick Terrell <terrelln@fb.com>
  • Loading branch information
Nick Terrell committed Oct 24, 2022
1 parent 4782c72 commit 2aa14b1
Show file tree
Hide file tree
Showing 36 changed files with 6,951 additions and 2,605 deletions.
475 changes: 297 additions & 178 deletions include/linux/zstd_lib.h

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions lib/zstd/common/bitstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE);
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
* than accessing memory. When bmi2 instruction is not present, we consider
* such cpus old (pre-Haswell, 2013) and their performance is not of that
* importance.
*/
#if defined(__x86_64__) || defined(_M_X86)
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
#else
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
#endif
}

MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
Expand Down
67 changes: 37 additions & 30 deletions lib/zstd/common/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H

#include "portability_macros.h"

/*-*******************************************************
* Compiler specifics
*********************************************************/
Expand All @@ -34,7 +36,7 @@

/*
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
This explictly marks such functions as __cdecl so that the code will still compile
This explicitly marks such functions as __cdecl so that the code will still compile
if a CC other than __cdecl has been made the default.
*/
#define WIN_CDECL
Expand Down Expand Up @@ -70,25 +72,13 @@


/* target attribute */
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))

/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
/* Target attribute for BMI2 dynamic dispatch.
* Enable lzcnt, bmi, and bmi2.
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
*/
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")

/* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */
Expand All @@ -115,8 +105,9 @@
}

/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else
Expand All @@ -134,20 +125,18 @@
#define LIKELY(x) (__builtin_expect((x), 1))
#define UNLIKELY(x) (__builtin_expect((x), 0))

#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
#else
# define ZSTD_UNREACHABLE { assert(0); }
#endif

/* disable warnings */

/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/


/* compat. with non-clang compilers */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif

/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* compile time determination of SIMD support */

/* C-language Attributes are added in C23. */
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
Expand All @@ -168,10 +157,28 @@
*/
#define ZSTD_FALLTHROUGH fallthrough

/* detects whether we are being compiled under msan */
/*-**************************************************************
* Alignment check
*****************************************************************/

/* this test was initially positioned in mem.h,
* but this file is removed (or replaced) for linux kernel
* so it's now hosted in compiler.h,
* which remains valid for both user & kernel spaces.
*/

#ifndef ZSTD_ALIGNOF
/* covers gcc, clang & MSVC */
/* note : this section must come first, before C11,
* due to a limitation in the kernel source generator */
# define ZSTD_ALIGNOF(T) __alignof(T)

#endif /* ZSTD_ALIGNOF */

/*-**************************************************************
* Sanitizer
*****************************************************************/

/* detects whether we are being compiled under asan */


#endif /* ZSTD_COMPILER_H */
7 changes: 4 additions & 3 deletions lib/zstd/common/entropy_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default(
}

#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
Expand Down Expand Up @@ -240,6 +240,7 @@ size_t FSE_readNCount(
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
}


/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer.
Expand Down Expand Up @@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
} }
Expand Down Expand Up @@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
}

#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
Expand Down
81 changes: 80 additions & 1 deletion lib/zstd/common/error_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
/* ****************************************
* Dependencies
******************************************/
#include "zstd_deps.h" /* size_t */
#include <linux/zstd_errors.h> /* enum list */
#include "compiler.h"
#include "debug.h"
#include "zstd_deps.h" /* size_t */


/* ****************************************
Expand Down Expand Up @@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
return ERR_getErrorString(ERR_getErrorCode(code));
}

/*
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}

/*
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}

#define ERR_QUOTE(str) #str

/*
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}

/*
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);

/*
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);


#endif /* ERROR_H_MODULE */
3 changes: 2 additions & 1 deletion lib/zstd/common/fse.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
*/
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);

Expand Down
2 changes: 1 addition & 1 deletion lib/zstd/common/fse_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
}

#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
}
Expand Down
Loading

0 comments on commit 2aa14b1

Please sign in to comment.