zstd: Update to upstream version 1.5.6

Release notes:
- https://github.com/facebook/zstd/releases/tag/v1.5.6
This commit is contained in:
Rémi Verschelde 2024-04-05 11:22:55 +02:00
parent f6a78f83aa
commit a88b4a4d56
No known key found for this signature in database
GPG Key ID: C3336907360768E1
41 changed files with 4643 additions and 2334 deletions

View File

@ -1034,7 +1034,7 @@ Files extracted from upstream source:
## zstd ## zstd
- Upstream: https://github.com/facebook/zstd - Upstream: https://github.com/facebook/zstd
- Version: 1.5.5 (63779c798237346c2b245c546c40b72a5a5913fe, 2023) - Version: 1.5.6 (794ea1b0afca0f020f4e57b6732332231fb23c70, 2024)
- License: BSD-3-Clause - License: BSD-3-Clause
Files extracted from upstream source: Files extracted from upstream source:

View File

@ -14,7 +14,7 @@
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "mem.h" /* MEM_STATIC */ #include "compiler.h" /* MEM_STATIC */
#define ZSTD_STATIC_LINKING_ONLY #define ZSTD_STATIC_LINKING_ONLY
#include "../zstd.h" /* ZSTD_customMem */ #include "../zstd.h" /* ZSTD_customMem */

View File

@ -90,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
/*-******************************************** /*-********************************************
* bitStream decoding API (read backward) * bitStream decoding API (read backward)
**********************************************/ **********************************************/
typedef size_t BitContainerType;
typedef struct { typedef struct {
size_t bitContainer; BitContainerType bitContainer;
unsigned bitsConsumed; unsigned bitsConsumed;
const char* ptr; const char* ptr;
const char* start; const char* start;
const char* limitPtr; const char* limitPtr;
} BIT_DStream_t; } BIT_DStream_t;
typedef enum { BIT_DStream_unfinished = 0, typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
BIT_DStream_endOfBuffer = 1, BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
BIT_DStream_completed = 2, BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ } BIT_DStream_status; /* result of BIT_reloadDStream() */
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
@ -112,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/* Start by invoking BIT_initDStream(). /* Start by invoking BIT_initDStream().
* A chunk of the bitStream is then stored into a local register. * A chunk of the bitStream is then stored into a local register.
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
* You can then retrieve bitFields stored into the local register, **in reverse order**. * You can then retrieve bitFields stored into the local register, **in reverse order**.
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@ -162,7 +163,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0; return 0;
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{ {
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
return _bzhi_u64(bitContainer, nbBits); return _bzhi_u64(bitContainer, nbBits);
@ -267,22 +268,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->bitContainer = *(const BYTE*)(bitD->start); bitD->bitContainer = *(const BYTE*)(bitD->start);
switch(srcSize) switch(srcSize)
{ {
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
default: break; default: break;
@ -297,12 +298,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
return srcSize; return srcSize;
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
{ {
return bitContainer >> start; return bitContainer >> start;
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
{ {
U32 const regMask = sizeof(bitContainer)*8 - 1; U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */ /* if start > regMask, bitstream is corrupted, and result is undefined */
@ -325,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
* On 32-bits, maxNbBits==24. * On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56. * On 64-bits, maxNbBits==56.
* @return : value extracted */ * @return : value extracted */
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{ {
/* arbitrate between double-shift and shift+mask */ /* arbitrate between double-shift and shift+mask */
#if 1 #if 1
@ -348,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
} }
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{ {
bitD->bitsConsumed += nbBits; bitD->bitsConsumed += nbBits;
} }
@ -357,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update. * Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register. * Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */ * @return : extracted value. */
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{ {
size_t const value = BIT_lookBits(bitD, nbBits); size_t const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits); BIT_skipBits(bitD, nbBits);
@ -374,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
return value; return value;
} }
/*! BIT_reloadDStream_internal() :
* Simple variant of BIT_reloadDStream(), with two conditions:
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
*/
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
{
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
bitD->ptr -= bitD->bitsConsumed >> 3;
assert(bitD->ptr >= bitD->start);
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
return BIT_DStream_unfinished;
}
/*! BIT_reloadDStreamFast() : /*! BIT_reloadDStreamFast() :
* Similar to BIT_reloadDStream(), but with two differences: * Similar to BIT_reloadDStream(), but with two differences:
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@ -384,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
{ {
if (UNLIKELY(bitD->ptr < bitD->limitPtr)) if (UNLIKELY(bitD->ptr < bitD->limitPtr))
return BIT_DStream_overflow; return BIT_DStream_overflow;
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); return BIT_reloadDStream_internal(bitD);
bitD->ptr -= bitD->bitsConsumed >> 3;
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
return BIT_DStream_unfinished;
} }
/*! BIT_reloadDStream() : /*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() . * Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer. * This function is safe, it guarantees it will not never beyond src buffer.
* @return : status of `BIT_DStream_t` internal register. * @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{ {
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
static const BitContainerType zeroFilled = 0;
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
/* overflow detected, erroneous scenario or end of stream: no update */
return BIT_DStream_overflow; return BIT_DStream_overflow;
}
assert(bitD->ptr >= bitD->start);
if (bitD->ptr >= bitD->limitPtr) { if (bitD->ptr >= bitD->limitPtr) {
return BIT_reloadDStreamFast(bitD); return BIT_reloadDStream_internal(bitD);
} }
if (bitD->ptr == bitD->start) { if (bitD->ptr == bitD->start) {
/* reached end of bitStream => no update */
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
return BIT_DStream_completed; return BIT_DStream_completed;
} }
/* start < ptr < limitPtr */ /* start < ptr < limitPtr => cautious update */
{ U32 nbBytes = bitD->bitsConsumed >> 3; { U32 nbBytes = bitD->bitsConsumed >> 3;
BIT_DStream_status result = BIT_DStream_unfinished; BIT_DStream_status result = BIT_DStream_unfinished;
if (bitD->ptr - nbBytes < bitD->start) { if (bitD->ptr - nbBytes < bitD->start) {

View File

@ -11,6 +11,8 @@
#ifndef ZSTD_COMPILER_H #ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H #define ZSTD_COMPILER_H
#include <stddef.h>
#include "portability_macros.h" #include "portability_macros.h"
/*-******************************************************* /*-*******************************************************
@ -51,12 +53,19 @@
# define WIN_CDECL # define WIN_CDECL
#endif #endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
/** /**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant * parameters. They must be inlined for the compiler to eliminate the constant
* branches. * branches.
*/ */
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
/** /**
* HINT_INLINE is used to help the compiler generate better code. It is *not* * HINT_INLINE is used to help the compiler generate better code. It is *not*
* used for "templates", so it can be tweaked based on the compilers * used for "templates", so it can be tweaked based on the compilers
@ -71,14 +80,28 @@
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
# define HINT_INLINE static INLINE_KEYWORD # define HINT_INLINE static INLINE_KEYWORD
#else #else
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR # define HINT_INLINE FORCE_INLINE_TEMPLATE
#endif #endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ /* "soft" inline :
* The compiler is free to select if it's a good idea to inline or not.
* The main objective is to silence compiler warnings
* when a defined function in included but not used.
*
* Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
* Updating the prefix is probably preferable, but requires a fairly large codemod,
* since this name is used everywhere.
*/
#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
#if defined(__GNUC__) #if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused)) # define MEM_STATIC static __inline UNUSED_ATTR
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define MEM_STATIC static inline
#elif defined(_MSC_VER)
# define MEM_STATIC static __inline
#else #else
# define UNUSED_ATTR # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#endif #endif
/* force no inlining */ /* force no inlining */
@ -109,10 +132,10 @@
/* prefetch /* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */ * can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH) #if defined(NO_PREFETCH)
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ # define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ # define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#else #else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
@ -120,24 +143,25 @@
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# elif defined(__aarch64__) # elif defined(__aarch64__)
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) # define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) # define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
# else # else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ # define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ # define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
# endif # endif
#endif /* NO_PREFETCH */ #endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64 #define CACHELINE_SIZE 64
#define PREFETCH_AREA(p, s) { \ #define PREFETCH_AREA(p, s) \
const char* const _ptr = (const char*)(p); \ do { \
size_t const _size = (size_t)(s); \ const char* const _ptr = (const char*)(p); \
size_t _pos; \ size_t const _size = (size_t)(s); \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ size_t _pos; \
PREFETCH_L2(_ptr + _pos); \ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
} \ PREFETCH_L2(_ptr + _pos); \
} } \
} while (0)
/* vectorization /* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
@ -166,9 +190,9 @@
#endif #endif
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } # define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
#else #else
# define ZSTD_UNREACHABLE { assert(0); } # define ZSTD_UNREACHABLE do { assert(0); } while (0)
#endif #endif
/* disable warnings */ /* disable warnings */
@ -281,6 +305,74 @@
* Sanitizer * Sanitizer
*****************************************************************/ *****************************************************************/
/**
* Zstd relies on pointer overflow in its decompressor.
* We add this attribute to functions that rely on pointer overflow.
*/
#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
# if __has_attribute(no_sanitize)
# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
/* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
# else
/* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
# endif
# else
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
# endif
#endif
/**
* Helper function to perform a wrapped pointer difference without trigging
* UBSAN.
*
* @returns lhs - rhs with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
{
return lhs - rhs;
}
/**
* Helper function to perform a wrapped pointer add without triggering UBSAN.
*
* @return ptr + add with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
{
return ptr + add;
}
/**
* Helper function to perform a wrapped pointer subtraction without triggering
* UBSAN.
*
* @return ptr - sub with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
{
return ptr - sub;
}
/**
* Helper function to add to a pointer that works around C's undefined behavior
* of adding 0 to NULL.
*
* @returns `ptr + add` except it defines `NULL + 0 == NULL`.
*/
MEM_STATIC
unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
{
return add > 0 ? ptr + add : ptr;
}
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an /* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
* abundance of caution, disable our custom poisoning on mingw. */ * abundance of caution, disable our custom poisoning on mingw. */
#ifdef __MINGW32__ #ifdef __MINGW32__

View File

@ -35,6 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
U32 f7b = 0; U32 f7b = 0;
U32 f7c = 0; U32 f7c = 0;
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
#if !defined(__clang__)
int reg[4]; int reg[4];
__cpuid((int*)reg, 0); __cpuid((int*)reg, 0);
{ {
@ -50,6 +51,41 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
f7c = (U32)reg[2]; f7c = (U32)reg[2];
} }
} }
#else
/* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
* which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
* to due to being a reserved register. So in that case, do the `cpuid`
* ourselves. Clang supports inline assembly anyway.
*/
U32 n;
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"popq %%rbx\n\t"
: "=a"(n)
: "a"(0)
: "rcx", "rdx");
if (n >= 1) {
U32 f1a;
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"popq %%rbx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
: "a"(1)
:);
}
if (n >= 7) {
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"movq %%rbx, %%rax\n\t"
"popq %%rbx"
: "=a"(f7b), "=c"(f7c)
: "a"(7), "c"(0)
: "rdx");
}
#endif
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
/* The following block like the normal cpuid branch below, but gcc /* The following block like the normal cpuid branch below, but gcc
* reserves ebx for use of its pic register so we must specially * reserves ebx for use of its pic register so we must specially

View File

@ -21,4 +21,10 @@
#include "debug.h" #include "debug.h"
#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
* translation unit is empty. So remove this from Linux kernel builds, but
* otherwise just leave it in.
*/
int g_debuglevel = DEBUGLEVEL; int g_debuglevel = DEBUGLEVEL;
#endif

View File

@ -85,18 +85,27 @@ extern int g_debuglevel; /* the variable is only declared,
It's useful when enabling very verbose levels It's useful when enabling very verbose levels
on selective conditions (such as position in src) */ on selective conditions (such as position in src) */
# define RAWLOG(l, ...) { \ # define RAWLOG(l, ...) \
if (l<=g_debuglevel) { \ do { \
ZSTD_DEBUG_PRINT(__VA_ARGS__); \ if (l<=g_debuglevel) { \
} } ZSTD_DEBUG_PRINT(__VA_ARGS__); \
# define DEBUGLOG(l, ...) { \ } \
if (l<=g_debuglevel) { \ } while (0)
ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
ZSTD_DEBUG_PRINT(" \n"); \ #define STRINGIFY(x) #x
} } #define TOSTRING(x) STRINGIFY(x)
#define LINE_AS_STRING TOSTRING(__LINE__)
# define DEBUGLOG(l, ...) \
do { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
ZSTD_DEBUG_PRINT(" \n"); \
} \
} while (0)
#else #else
# define RAWLOG(l, ...) {} /* disabled */ # define RAWLOG(l, ...) do { } while (0) /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */ # define DEBUGLOG(l, ...) do { } while (0) /* disabled */
#endif #endif

View File

@ -60,8 +60,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
/* check and forward error code */ /* check and forward error code */
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e #define CHECK_V_F(e, f) \
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } size_t const e = f; \
do { \
if (ERR_isError(e)) \
return e; \
} while (0)
#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0)
/*-**************************************** /*-****************************************
@ -95,10 +100,12 @@ void _force_has_format_string(const char *format, ...) {
* We want to force this function invocation to be syntactically correct, but * We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments. * we don't want to force runtime evaluation of its arguments.
*/ */
#define _FORCE_HAS_FORMAT_STRING(...) \ #define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \ do { \
_force_has_format_string(__VA_ARGS__); \ if (0) { \
} _force_has_format_string(__VA_ARGS__); \
} \
} while (0)
#define ERR_QUOTE(str) #str #define ERR_QUOTE(str) #str
@ -109,48 +116,50 @@ void _force_has_format_string(const char *format, ...) {
* In order to do that (particularly, printing the conditional that failed), * In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR(). * this can't just wrap RETURN_ERROR().
*/ */
#define RETURN_ERROR_IF(cond, err, ...) \ #define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \ do { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ if (cond) { \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
RAWLOG(3, ": " __VA_ARGS__); \ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, "\n"); \ RAWLOG(3, ": " __VA_ARGS__); \
return ERROR(err); \ RAWLOG(3, "\n"); \
} return ERROR(err); \
} \
} while (0)
/** /**
* Unconditionally return the specified error. * Unconditionally return the specified error.
* *
* In debug modes, prints additional information. * In debug modes, prints additional information.
*/ */
#define RETURN_ERROR(err, ...) \ #define RETURN_ERROR(err, ...) \
do { \ do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \ RAWLOG(3, "\n"); \
return ERROR(err); \ return ERROR(err); \
} while(0); } while(0)
/** /**
* If the provided expression evaluates to an error code, returns that error code. * If the provided expression evaluates to an error code, returns that error code.
* *
* In debug modes, prints additional information. * In debug modes, prints additional information.
*/ */
#define FORWARD_IF_ERROR(err, ...) \ #define FORWARD_IF_ERROR(err, ...) \
do { \ do { \
size_t const err_code = (err); \ size_t const err_code = (err); \
if (ERR_isError(err_code)) { \ if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \ RAWLOG(3, "\n"); \
return err_code; \ return err_code; \
} \ } \
} while(0); } while(0)
#if defined (__cplusplus) #if defined (__cplusplus)
} }

View File

@ -229,6 +229,7 @@ If there is an error, the function will return an error code, which can be teste
#endif /* FSE_H */ #endif /* FSE_H */
#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
#define FSE_H_FSE_STATIC_LINKING_ONLY #define FSE_H_FSE_STATIC_LINKING_ONLY
@ -464,13 +465,13 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
const U16* const stateTable = (const U16*)(statePtr->stateTable); const U16* const stateTable = (const U16*)(statePtr->stateTable);
U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
BIT_addBits(bitC, statePtr->value, nbBitsOut); BIT_addBits(bitC, (size_t)statePtr->value, nbBitsOut);
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
} }
MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
{ {
BIT_addBits(bitC, statePtr->value, statePtr->stateLog); BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
BIT_flushBits(bitC); BIT_flushBits(bitC);
} }

View File

@ -22,8 +22,7 @@
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "fse.h" #include "fse.h"
#include "error_private.h" #include "error_private.h"
#define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" /* ZSTD_memcpy */
#include "zstd_deps.h"
#include "bits.h" /* ZSTD_highbit32 */ #include "bits.h" /* ZSTD_highbit32 */
@ -84,7 +83,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
symbolNext[s] = 1; symbolNext[s] = 1;
} else { } else {
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
symbolNext[s] = normalizedCounter[s]; symbolNext[s] = (U16)normalizedCounter[s];
} } } } } }
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
} }
@ -99,8 +98,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
* our buffer to handle the over-write. * our buffer to handle the over-write.
*/ */
{ { U64 const add = 0x0101010101010101ull;
U64 const add = 0x0101010101010101ull;
size_t pos = 0; size_t pos = 0;
U64 sv = 0; U64 sv = 0;
U32 s; U32 s;
@ -111,9 +109,8 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
for (i = 8; i < n; i += 8) { for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv); MEM_write64(spread + pos + i, sv);
} }
pos += n; pos += (size_t)n;
} } }
}
/* Now we spread those positions across the table. /* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the * The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses. * variable size inner loop, which caused lots of branch misses.
@ -232,12 +229,12 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
break; break;
} } } }
return op-ostart; assert(op >= ostart);
return (size_t)(op-ostart);
} }
typedef struct { typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1]; short ncount[FSE_MAX_SYMBOL_VALUE + 1];
FSE_DTable dtable[1]; /* Dynamically sized */
} FSE_DecompressWksp; } FSE_DecompressWksp;
@ -252,13 +249,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
unsigned tableLog; unsigned tableLog;
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
/* correct offset to dtable depends on this property */
FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
/* normal FSE decoding mode */ /* normal FSE decoding mode */
{ { size_t const NCountLength =
size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength; if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge); if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize); assert(NCountLength <= cSrcSize);
@ -271,16 +273,16 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
{ {
const void* ptr = wksp->dtable; const void* ptr = dtable;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode; const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */ /* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
} }
} }

View File

@ -197,9 +197,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
/** HUF_getNbBitsFromCTable() : /** HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private */ * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
* Note 2 : is not inlined, as HUF_CElt definition is private
*/
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
typedef struct {
BYTE tableLog;
BYTE maxSymbolValue;
BYTE unused[sizeof(size_t) - 2];
} HUF_CTableHeader;
/** HUF_readCTableHeader() :
* @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
*/
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
/* /*
* HUF_decompress() does the following: * HUF_decompress() does the following:
* 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics

View File

@ -31,15 +31,6 @@ extern "C" {
# include <stdlib.h> /* _byteswap_ulong */ # include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */ # include <intrin.h> /* _byteswap_* */
#endif #endif
#if defined(__GNUC__)
# define MEM_STATIC static __inline __attribute__((unused))
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define MEM_STATIC static inline
#elif defined(_MSC_VER)
# define MEM_STATIC static __inline
#else
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
/*-************************************************************** /*-**************************************************************
* Basic Types * Basic Types

View File

@ -223,7 +223,7 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
if (!threadPool) return 1; if (!threadPool) return 1;
/* replace existing thread pool */ /* replace existing thread pool */
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
ZSTD_customFree(ctx->threads, ctx->customMem); ZSTD_customFree(ctx->threads, ctx->customMem);
ctx->threads = threadPool; ctx->threads = threadPool;
/* Initialize additional threads */ /* Initialize additional threads */

View File

@ -47,7 +47,7 @@ void POOL_joinJobs(POOL_ctx* ctx);
/*! POOL_resize() : /*! POOL_resize() :
* Expands or shrinks pool's number of threads. * Expands or shrinks pool's number of threads.
* This is more efficient than releasing + creating a new context, * This is more efficient than releasing + creating a new context,
* since it tries to preserve and re-use existing threads. * since it tries to preserve and reuse existing threads.
* `numThreads` must be at least 1. * `numThreads` must be at least 1.
* @return : 0 when resize was successful, * @return : 0 when resize was successful,
* !0 (typically 1) if there is an error. * !0 (typically 1) if there is an error.

View File

@ -68,6 +68,8 @@
/* Mark the internal assembly functions as hidden */ /* Mark the internal assembly functions as hidden */
#ifdef __ELF__ #ifdef __ELF__
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
#elif defined(__APPLE__)
# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
#else #else
# define ZSTD_HIDE_ASM_FUNCTION(func) # define ZSTD_HIDE_ASM_FUNCTION(func)
#endif #endif

View File

@ -73,10 +73,12 @@ int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
ZSTD_thread_params_t thread_param; ZSTD_thread_params_t thread_param;
(void)unused; (void)unused;
if (thread==NULL) return -1;
*thread = NULL;
thread_param.start_routine = start_routine; thread_param.start_routine = start_routine;
thread_param.arg = arg; thread_param.arg = arg;
thread_param.initialized = 0; thread_param.initialized = 0;
*thread = NULL;
/* Setup thread initialization synchronization */ /* Setup thread initialization synchronization */
if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) { if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
@ -91,7 +93,7 @@ int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
/* Spawn thread */ /* Spawn thread */
*thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL); *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
if (!thread) { if (*thread==NULL) {
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
return errno; return errno;
@ -137,6 +139,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread)
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{ {
assert(mutex != NULL);
*mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
if (!*mutex) if (!*mutex)
return 1; return 1;
@ -145,6 +148,7 @@ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t con
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
{ {
assert(mutex != NULL);
if (!*mutex) if (!*mutex)
return 0; return 0;
{ {
@ -156,6 +160,7 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{ {
assert(cond != NULL);
*cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
if (!*cond) if (!*cond)
return 1; return 1;
@ -164,6 +169,7 @@ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const*
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
{ {
assert(cond != NULL);
if (!*cond) if (!*cond)
return 0; return 0;
{ {

View File

@ -1,24 +1,18 @@
/* /*
* xxHash - Fast Hash algorithm * xxHash - Extremely Fast Hash algorithm
* Copyright (c) Meta Platforms, Inc. and affiliates. * Copyright (c) Yann Collet - Meta Platforms, Inc
*
* You can contact the author at :
* - xxHash homepage: https://cyan4973.github.io/xxHash/
* - xxHash source repository : https://github.com/Cyan4973/xxHash
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found * LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree). * in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses. * You may select, at your option, one of the above-listed licenses.
*/ */
/* /*
* xxhash.c instantiates functions defined in xxhash.h * xxhash.c instantiates functions defined in xxhash.h
*/ */
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ #define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */ #define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h" #include "xxhash.h"

File diff suppressed because it is too large Load Diff

View File

@ -178,7 +178,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
ZSTD_memcpy(dst, src, 8); ZSTD_memcpy(dst, src, 8);
#endif #endif
} }
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } #define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
/* Need to use memmove here since the literal buffer can now be located within /* Need to use memmove here since the literal buffer can now be located within
the dst buffer. In circumstances where the op "catches up" to where the the dst buffer. In circumstances where the op "catches up" to where the
@ -198,7 +198,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
ZSTD_memcpy(dst, copy16_buf, 16); ZSTD_memcpy(dst, copy16_buf, 16);
#endif #endif
} }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } #define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
#define WILDCOPY_OVERLENGTH 32 #define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16 #define WILDCOPY_VECLEN 16
@ -227,7 +227,7 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
/* Handle short offset copies. */ /* Handle short offset copies. */
do { do {
COPY8(op, ip) COPY8(op, ip);
} while (op < oend); } while (op < oend);
} else { } else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
@ -366,13 +366,13 @@ typedef struct {
/*! ZSTD_getcBlockSize() : /*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */ * Provides the size of compressed block from block header `src` */
/* Used by: decompress, fullbench (does not get its definition from here) */ /* Used by: decompress, fullbench */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr); blockProperties_t* bpPtr);
/*! ZSTD_decodeSeqHeaders() : /*! ZSTD_decodeSeqHeaders() :
* decode sequence header from src */ * decode sequence header from src */
/* Used by: decompress, fullbench (does not get its definition from here) */ /* Used by: zstd_decompress_block, fullbench */
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize); const void* src, size_t srcSize);

View File

@ -25,7 +25,7 @@
#include "../common/error_private.h" #include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64 #define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */ #include "../common/bits.h" /* ZSTD_highbit32 */
@ -225,8 +225,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */ + 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8) + 2 /* first two symbols may use one additional bit each */) / 8)
+ 1 /* round up to whole nb bytes */ + 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */; + 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
} }
@ -255,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
/* Init */ /* Init */
remaining = tableSize+1; /* +1 for extra accuracy */ remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize; threshold = tableSize;
nbBits = tableLog+1; nbBits = (int)tableLog+1;
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) { if (previousIs0) {
@ -274,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
} }
while (symbol >= start+3) { while (symbol >= start+3) {
start+=3; start+=3;
bitStream += 3 << bitCount; bitStream += 3U << bitCount;
bitCount += 2; bitCount += 2;
} }
bitStream += (symbol-start) << bitCount; bitStream += (symbol-start) << bitCount;
@ -294,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
count++; /* +1 for extra accuracy */ count++; /* +1 for extra accuracy */
if (count>=threshold) if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitStream += count << bitCount; bitStream += (U32)count << bitCount;
bitCount += nbBits; bitCount += nbBits;
bitCount -= (count<max); bitCount -= (count<max);
previousIs0 = (count==1); previousIs0 = (count==1);
@ -322,7 +322,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
out[1] = (BYTE)(bitStream>>8); out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8; out+= (bitCount+7) /8;
return (out-ostart); assert(out >= ostart);
return (size_t)(out-ostart);
} }

View File

@ -220,6 +220,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
} }
} }
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
{
HUF_CTableHeader header;
ZSTD_memcpy(&header, ctable, sizeof(header));
return header;
}
static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
{
HUF_CTableHeader header;
HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
ZSTD_memset(&header, 0, sizeof(header));
assert(tableLog < 256);
header.tableLog = (BYTE)tableLog;
assert(maxSymbolValue < 256);
header.maxSymbolValue = (BYTE)maxSymbolValue;
ZSTD_memcpy(ctable, &header, sizeof(header));
}
typedef struct { typedef struct {
HUF_CompressWeightsWksp wksp; HUF_CompressWeightsWksp wksp;
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
@ -237,6 +256,9 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp)); HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
/* check conditions */ /* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@ -283,7 +305,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
CTable[0] = tableLog; *maxSymbolValuePtr = nbSymbols - 1;
HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
/* Prepare base value per rank */ /* Prepare base value per rank */
{ U32 n, nextRankStart = 0; { U32 n, nextRankStart = 0;
@ -315,7 +339,6 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); } { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
} }
*maxSymbolValuePtr = nbSymbols - 1;
return readSize; return readSize;
} }
@ -323,6 +346,8 @@ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{ {
const HUF_CElt* const ct = CTable + 1; const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX); assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
return 0;
return (U32)HUF_getNbBits(ct[symbolValue]); return (U32)HUF_getNbBits(ct[symbolValue]);
} }
@ -723,7 +748,8 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
for (n=0; n<alphabetSize; n++) for (n=0; n<alphabetSize; n++)
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
CTable[0] = maxNbBits;
HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
} }
size_t size_t
@ -776,13 +802,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
} }
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
HUF_CElt const* ct = CTable + 1; HUF_CTableHeader header = HUF_readCTableHeader(CTable);
int bad = 0; HUF_CElt const* ct = CTable + 1;
int s; int bad = 0;
for (s = 0; s <= (int)maxSymbolValue; ++s) { int s;
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
} assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
return !bad;
if (header.maxSymbolValue < maxSymbolValue)
return 0;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
}
return !bad;
} }
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
@ -1024,17 +1057,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable) const HUF_CElt* CTable)
{ {
U32 const tableLog = (U32)CTable[0]; U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
HUF_CElt const* ct = CTable + 1; HUF_CElt const* ct = CTable + 1;
const BYTE* ip = (const BYTE*) src; const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize; BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
HUF_CStream_t bitC; HUF_CStream_t bitC;
/* init */ /* init */
if (dstSize < 8) return 0; /* not enough space to compress */ if (dstSize < 8) return 0; /* not enough space to compress */
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); { BYTE* op = ostart;
size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; } if (HUF_isError(initErr)) return 0; }
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
@ -1255,7 +1288,7 @@ unsigned HUF_optimalTableLog(
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
size_t maxBits, hSize, newSize; size_t hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality); const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1; size_t optSize = ((size_t) ~0) - 1;
@ -1266,12 +1299,14 @@ unsigned HUF_optimalTableLog(
/* Search until size increases */ /* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
if (maxBits < optLogGuess && optLogGuess > minTableLog) break; { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
}
if (ERR_isError(hSize)) continue; if (ERR_isError(hSize)) continue;
@ -1372,12 +1407,6 @@ HUF_compress_internal (void* dst, size_t dstSize,
huffLog = (U32)maxBits; huffLog = (U32)maxBits;
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
} }
/* Zero unused symbols in CTable, so we can check it for validity */
{
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
}
/* Write table description header */ /* Write table description header */
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
@ -1420,7 +1449,7 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
/* HUF_compress4X_repeat(): /* HUF_compress4X_repeat():
* compress input using 4 streams. * compress input using 4 streams.
* consider skipping quickly * consider skipping quickly
* re-use an existing huffman compression table */ * reuse an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize, size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,

View File

@ -178,6 +178,7 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
{ {
DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
if (cctx==NULL) return 0; /* support free on NULL */ if (cctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(cctx->staticSize, memory_allocation, RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"not compatible with static CCtx"); "not compatible with static CCtx");
@ -649,10 +650,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
return 0; return 0;
} }
#define BOUNDCHECK(cParam, val) { \ #define BOUNDCHECK(cParam, val) \
RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ do { \
parameter_outOfBound, "Param out of bounds"); \ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
} parameter_outOfBound, "Param out of bounds"); \
} while (0)
static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
@ -868,7 +870,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else #else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
CCtxParams->nbWorkers = value; CCtxParams->nbWorkers = value;
return CCtxParams->nbWorkers; return (size_t)(CCtxParams->nbWorkers);
#endif #endif
case ZSTD_c_jobSize : case ZSTD_c_jobSize :
@ -892,7 +894,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else #else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->overlapLog = value; CCtxParams->overlapLog = value;
return CCtxParams->overlapLog; return (size_t)CCtxParams->overlapLog;
#endif #endif
case ZSTD_c_rsyncable : case ZSTD_c_rsyncable :
@ -902,7 +904,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else #else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->rsyncable = value; CCtxParams->rsyncable = value;
return CCtxParams->rsyncable; return (size_t)CCtxParams->rsyncable;
#endif #endif
case ZSTD_c_enableDedicatedDictSearch : case ZSTD_c_enableDedicatedDictSearch :
@ -939,8 +941,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
return CCtxParams->ldmParams.hashRateLog; return CCtxParams->ldmParams.hashRateLog;
case ZSTD_c_targetCBlockSize : case ZSTD_c_targetCBlockSize :
if (value!=0) /* 0 ==> default */ if (value!=0) { /* 0 ==> default */
value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
BOUNDCHECK(ZSTD_c_targetCBlockSize, value); BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
}
CCtxParams->targetCBlockSize = (U32)value; CCtxParams->targetCBlockSize = (U32)value;
return CCtxParams->targetCBlockSize; return CCtxParams->targetCBlockSize;
@ -968,7 +972,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_validateSequences: case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value); BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value; CCtxParams->validateSequences = value;
return CCtxParams->validateSequences; return (size_t)CCtxParams->validateSequences;
case ZSTD_c_useBlockSplitter: case ZSTD_c_useBlockSplitter:
BOUNDCHECK(ZSTD_c_useBlockSplitter, value); BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
@ -983,7 +987,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_deterministicRefPrefix: case ZSTD_c_deterministicRefPrefix:
BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
CCtxParams->deterministicRefPrefix = !!value; CCtxParams->deterministicRefPrefix = !!value;
return CCtxParams->deterministicRefPrefix; return (size_t)CCtxParams->deterministicRefPrefix;
case ZSTD_c_prefetchCDictTables: case ZSTD_c_prefetchCDictTables:
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value); BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
@ -993,7 +997,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_enableSeqProducerFallback: case ZSTD_c_enableSeqProducerFallback:
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value); BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
CCtxParams->enableMatchFinderFallback = value; CCtxParams->enableMatchFinderFallback = value;
return CCtxParams->enableMatchFinderFallback; return (size_t)CCtxParams->enableMatchFinderFallback;
case ZSTD_c_maxBlockSize: case ZSTD_c_maxBlockSize:
if (value!=0) /* 0 ==> default */ if (value!=0) /* 0 ==> default */
@ -1363,7 +1367,6 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Reset parameters is only possible during init stage."); "Reset parameters is only possible during init stage.");
ZSTD_clearAllDicts(cctx); ZSTD_clearAllDicts(cctx);
ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams); return ZSTD_CCtxParams_reset(&cctx->requestedParams);
} }
return 0; return 0;
@ -1391,11 +1394,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
static ZSTD_compressionParameters static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams) ZSTD_clampCParams(ZSTD_compressionParameters cParams)
{ {
# define CLAMP_TYPE(cParam, val, type) { \ # define CLAMP_TYPE(cParam, val, type) \
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ do { \
if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
} else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
} while (0)
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
CLAMP(ZSTD_c_windowLog, cParams.windowLog); CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(ZSTD_c_chainLog, cParams.chainLog); CLAMP(ZSTD_c_chainLog, cParams.chainLog);
@ -1467,6 +1471,48 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
assert(ZSTD_checkCParams(cPar)==0); assert(ZSTD_checkCParams(cPar)==0);
/* Cascade the selected strategy down to the next-highest one built into
* this binary. */
#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_btultra2) {
cPar.strategy = ZSTD_btultra;
}
if (cPar.strategy == ZSTD_btultra) {
cPar.strategy = ZSTD_btopt;
}
#endif
#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_btopt) {
cPar.strategy = ZSTD_btlazy2;
}
#endif
#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_btlazy2) {
cPar.strategy = ZSTD_lazy2;
}
#endif
#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_lazy2) {
cPar.strategy = ZSTD_lazy;
}
#endif
#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_lazy) {
cPar.strategy = ZSTD_greedy;
}
#endif
#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_greedy) {
cPar.strategy = ZSTD_dfast;
}
#endif
#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
if (cPar.strategy == ZSTD_dfast) {
cPar.strategy = ZSTD_fast;
cPar.targetLength = 0;
}
#endif
switch (mode) { switch (mode) {
case ZSTD_cpm_unknown: case ZSTD_cpm_unknown:
case ZSTD_cpm_noAttachDict: case ZSTD_cpm_noAttachDict:
@ -1617,8 +1663,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32)) + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder) size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize) ? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0; : 0;
@ -1707,7 +1753,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
* be needed. However, we still allocate two 0-sized buffers, which can * be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */ * take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal( return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize); &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
} }
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
@ -1768,7 +1814,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
return ZSTD_estimateCCtxSize_usingCCtxParams_internal( return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize); ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
} }
} }
@ -2001,8 +2047,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
} }
ms->cParams = *cParams; ms->cParams = *cParams;
@ -2074,7 +2120,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(params->maxBlockSize, windowSize); size_t const blockSize = MIN(params->maxBlockSize, windowSize);
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer); size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1 ? ZSTD_compressBound(blockSize) + 1
: 0; : 0;
@ -2091,8 +2137,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const neededSpace = size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal( ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize); buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
int resizeWorkspace;
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
@ -2101,7 +2146,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
{ /* Check if workspace is large enough, alloc a new one if needed */ { /* Check if workspace is large enough, alloc a new one if needed */
int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
resizeWorkspace = workspaceTooSmall || workspaceWasteful; int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
DEBUGLOG(4, "Need %zu B workspace", neededSpace); DEBUGLOG(4, "Need %zu B workspace", neededSpace);
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
@ -2176,10 +2221,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
} }
/* reserve space for block-level external sequences */ /* reserve space for block-level external sequences */
if (params->useSequenceProducer) { if (ZSTD_hasExtSeqProd(params)) {
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; zc->extSeqBufCapacity = maxNbExternalSeq;
zc->externalMatchCtx.seqBuffer = zc->extSeqBuf =
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
} }
@ -2564,7 +2609,7 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
assert(size < (1U<<31)); /* can be casted to int */ assert(size < (1U<<31)); /* can be casted to int */
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't /* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table * access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. * space every time we mark it dirty.
* *
@ -2992,40 +3037,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */, { ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast, ZSTD_compressBlock_fast,
ZSTD_compressBlock_doubleFast, ZSTD_COMPRESSBLOCK_DOUBLEFAST,
ZSTD_compressBlock_greedy, ZSTD_COMPRESSBLOCK_GREEDY,
ZSTD_compressBlock_lazy, ZSTD_COMPRESSBLOCK_LAZY,
ZSTD_compressBlock_lazy2, ZSTD_COMPRESSBLOCK_LAZY2,
ZSTD_compressBlock_btlazy2, ZSTD_COMPRESSBLOCK_BTLAZY2,
ZSTD_compressBlock_btopt, ZSTD_COMPRESSBLOCK_BTOPT,
ZSTD_compressBlock_btultra, ZSTD_COMPRESSBLOCK_BTULTRA,
ZSTD_compressBlock_btultra2 }, ZSTD_COMPRESSBLOCK_BTULTRA2
},
{ ZSTD_compressBlock_fast_extDict /* default for 0 */, { ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_fast_extDict,
ZSTD_compressBlock_doubleFast_extDict, ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT,
ZSTD_compressBlock_greedy_extDict, ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT,
ZSTD_compressBlock_lazy_extDict, ZSTD_COMPRESSBLOCK_LAZY_EXTDICT,
ZSTD_compressBlock_lazy2_extDict, ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT,
ZSTD_compressBlock_btlazy2_extDict, ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT,
ZSTD_compressBlock_btopt_extDict, ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT,
ZSTD_compressBlock_btultra_extDict, ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT,
ZSTD_compressBlock_btultra_extDict }, ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT
},
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState, ZSTD_compressBlock_fast_dictMatchState,
ZSTD_compressBlock_doubleFast_dictMatchState, ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE,
ZSTD_compressBlock_greedy_dictMatchState, ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE,
ZSTD_compressBlock_lazy_dictMatchState, ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE,
ZSTD_compressBlock_lazy2_dictMatchState, ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE,
ZSTD_compressBlock_btlazy2_dictMatchState, ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE,
ZSTD_compressBlock_btopt_dictMatchState, ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE,
ZSTD_compressBlock_btultra_dictMatchState, ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE,
ZSTD_compressBlock_btultra_dictMatchState }, ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE
},
{ NULL /* default for 0 */, { NULL /* default for 0 */,
NULL, NULL,
NULL, NULL,
ZSTD_compressBlock_greedy_dedicatedDictSearch, ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH,
ZSTD_compressBlock_lazy_dedicatedDictSearch, ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH,
ZSTD_compressBlock_lazy2_dedicatedDictSearch, ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH,
NULL, NULL,
NULL, NULL,
NULL, NULL,
@ -3038,18 +3086,26 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
{ ZSTD_compressBlock_greedy_row, {
ZSTD_compressBlock_lazy_row, ZSTD_COMPRESSBLOCK_GREEDY_ROW,
ZSTD_compressBlock_lazy2_row }, ZSTD_COMPRESSBLOCK_LAZY_ROW,
{ ZSTD_compressBlock_greedy_extDict_row, ZSTD_COMPRESSBLOCK_LAZY2_ROW
ZSTD_compressBlock_lazy_extDict_row, },
ZSTD_compressBlock_lazy2_extDict_row }, {
{ ZSTD_compressBlock_greedy_dictMatchState_row, ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW,
ZSTD_compressBlock_lazy_dictMatchState_row, ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW,
ZSTD_compressBlock_lazy2_dictMatchState_row }, ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW
{ ZSTD_compressBlock_greedy_dedicatedDictSearch_row, },
ZSTD_compressBlock_lazy_dedicatedDictSearch_row, {
ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW,
ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW,
ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW
},
{
ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW,
ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW,
ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW
}
}; };
DEBUGLOG(4, "Selecting a row-based matchfinder"); DEBUGLOG(4, "Selecting a row-based matchfinder");
assert(useRowMatchFinder != ZSTD_ps_auto); assert(useRowMatchFinder != ZSTD_ps_auto);
@ -3192,7 +3248,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
/* External matchfinder + LDM is technically possible, just not implemented yet. /* External matchfinder + LDM is technically possible, just not implemented yet.
* We need to revisit soon and implement it. */ * We need to revisit soon and implement it. */
RETURN_ERROR_IF( RETURN_ERROR_IF(
zc->appliedParams.useSequenceProducer, ZSTD_hasExtSeqProd(&zc->appliedParams),
parameter_combination_unsupported, parameter_combination_unsupported,
"Long-distance matching with external sequence producer enabled is not currently supported." "Long-distance matching with external sequence producer enabled is not currently supported."
); );
@ -3211,7 +3267,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
/* External matchfinder + LDM is technically possible, just not implemented yet. /* External matchfinder + LDM is technically possible, just not implemented yet.
* We need to revisit soon and implement it. */ * We need to revisit soon and implement it. */
RETURN_ERROR_IF( RETURN_ERROR_IF(
zc->appliedParams.useSequenceProducer, ZSTD_hasExtSeqProd(&zc->appliedParams),
parameter_combination_unsupported, parameter_combination_unsupported,
"Long-distance matching with external sequence producer enabled is not currently supported." "Long-distance matching with external sequence producer enabled is not currently supported."
); );
@ -3230,18 +3286,18 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
zc->appliedParams.useRowMatchFinder, zc->appliedParams.useRowMatchFinder,
src, srcSize); src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size); assert(ldmSeqStore.pos == ldmSeqStore.size);
} else if (zc->appliedParams.useSequenceProducer) { } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
assert( assert(
zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
); );
assert(zc->externalMatchCtx.mFinder != NULL); assert(zc->appliedParams.extSeqProdFunc != NULL);
{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)( size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
zc->externalMatchCtx.mState, zc->appliedParams.extSeqProdState,
zc->externalMatchCtx.seqBuffer, zc->extSeqBuf,
zc->externalMatchCtx.seqBufferCapacity, zc->extSeqBufCapacity,
src, srcSize, src, srcSize,
NULL, 0, /* dict and dictSize, currently not supported */ NULL, 0, /* dict and dictSize, currently not supported */
zc->appliedParams.compressionLevel, zc->appliedParams.compressionLevel,
@ -3249,21 +3305,21 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
); );
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult( size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
zc->externalMatchCtx.seqBuffer, zc->extSeqBuf,
nbExternalSeqs, nbExternalSeqs,
zc->externalMatchCtx.seqBufferCapacity, zc->extSeqBufCapacity,
srcSize srcSize
); );
/* Return early if there is no error, since we don't need to worry about last literals */ /* Return early if there is no error, since we don't need to worry about last literals */
if (!ZSTD_isError(nbPostProcessedSeqs)) { if (!ZSTD_isError(nbPostProcessedSeqs)) {
ZSTD_sequencePosition seqPos = {0,0,0}; ZSTD_sequencePosition seqPos = {0,0,0};
size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs); size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!"); RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
FORWARD_IF_ERROR( FORWARD_IF_ERROR(
ZSTD_copySequencesToSeqStoreExplicitBlockDelim( ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
zc, &seqPos, zc, &seqPos,
zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, zc->extSeqBuf, nbPostProcessedSeqs,
src, srcSize, src, srcSize,
zc->appliedParams.searchForExternalRepcodes zc->appliedParams.searchForExternalRepcodes
), ),
@ -3280,9 +3336,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
} }
/* Fallback to software matchfinder */ /* Fallback to software matchfinder */
{ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, { ZSTD_blockCompressor const blockCompressor =
zc->appliedParams.useRowMatchFinder, ZSTD_selectBlockCompressor(
dictMode); zc->appliedParams.cParams.strategy,
zc->appliedParams.useRowMatchFinder,
dictMode);
ms->ldmSeqStore = NULL; ms->ldmSeqStore = NULL;
DEBUGLOG( DEBUGLOG(
5, 5,
@ -3292,9 +3350,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
} } } }
} else { /* not long range mode and no external matchfinder */ } else { /* not long range mode and no external matchfinder */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(
zc->appliedParams.useRowMatchFinder, zc->appliedParams.cParams.strategy,
dictMode); zc->appliedParams.useRowMatchFinder,
dictMode);
ms->ldmSeqStore = NULL; ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
} }
@ -3304,29 +3363,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
return ZSTDbss_compress; return ZSTDbss_compress;
} }
static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
{ {
const seqStore_t* seqStore = ZSTD_getSeqStore(zc); const seqDef* inSeqs = seqStore->sequencesStart;
const seqDef* seqStoreSeqs = seqStore->sequencesStart; const size_t nbInSequences = seqStore->sequences - inSeqs;
size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
size_t literalsRead = 0;
size_t lastLLSize;
ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
const size_t nbOutSequences = nbInSequences + 1;
size_t nbOutLiterals = 0;
repcodes_t repcodes;
size_t i; size_t i;
repcodes_t updatedRepcodes;
assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); /* Bounds check that we have enough space for every input sequence
/* Ensure we have enough space for last literals "sequence" */ * and the block delimiter
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); */
ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); assert(seqCollector->seqIndex <= seqCollector->maxSequences);
for (i = 0; i < seqStoreSeqSize; ++i) { RETURN_ERROR_IF(
U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
outSeqs[i].litLength = seqStoreSeqs[i].litLength; dstSize_tooSmall,
outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; "Not enough space to copy sequences");
ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
for (i = 0; i < nbInSequences; ++i) {
U32 rawOffset;
outSeqs[i].litLength = inSeqs[i].litLength;
outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
outSeqs[i].rep = 0; outSeqs[i].rep = 0;
/* Handle the possible single length >= 64K
* There can only be one because we add MINMATCH to every match length,
* and blocks are at most 128K.
*/
if (i == seqStore->longLengthPos) { if (i == seqStore->longLengthPos) {
if (seqStore->longLengthType == ZSTD_llt_literalLength) { if (seqStore->longLengthType == ZSTD_llt_literalLength) {
outSeqs[i].litLength += 0x10000; outSeqs[i].litLength += 0x10000;
@ -3335,41 +3403,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
} }
} }
if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { /* Determine the raw offset given the offBase, which may be a repcode. */
/* Derive the correct offset corresponding to a repcode */ if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
outSeqs[i].rep = seqStoreSeqs[i].offBase; const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
assert(repcode > 0);
outSeqs[i].rep = repcode;
if (outSeqs[i].litLength != 0) { if (outSeqs[i].litLength != 0) {
rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; rawOffset = repcodes.rep[repcode - 1];
} else { } else {
if (outSeqs[i].rep == 3) { if (repcode == 3) {
rawOffset = updatedRepcodes.rep[0] - 1; assert(repcodes.rep[0] > 1);
rawOffset = repcodes.rep[0] - 1;
} else { } else {
rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; rawOffset = repcodes.rep[repcode];
} }
} }
} else {
rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
} }
outSeqs[i].offset = rawOffset; outSeqs[i].offset = rawOffset;
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
so we provide seqStoreSeqs[i].offset - 1 */ /* Update repcode history for the sequence */
ZSTD_updateRep(updatedRepcodes.rep, ZSTD_updateRep(repcodes.rep,
seqStoreSeqs[i].offBase, inSeqs[i].offBase,
seqStoreSeqs[i].litLength == 0); inSeqs[i].litLength == 0);
literalsRead += outSeqs[i].litLength;
nbOutLiterals += outSeqs[i].litLength;
} }
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
* for the block boundary, according to the API. * for the block boundary, according to the API.
*/ */
assert(seqStoreLiteralsSize >= literalsRead); assert(nbInLiterals >= nbOutLiterals);
lastLLSize = seqStoreLiteralsSize - literalsRead; {
outSeqs[i].litLength = (U32)lastLLSize; const size_t lastLLSize = nbInLiterals - nbOutLiterals;
outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; outSeqs[nbInSequences].litLength = (U32)lastLLSize;
seqStoreSeqSize++; outSeqs[nbInSequences].matchLength = 0;
zc->seqCollector.seqIndex += seqStoreSeqSize; outSeqs[nbInSequences].offset = 0;
assert(nbOutSequences == nbInSequences + 1);
}
seqCollector->seqIndex += nbOutSequences;
assert(seqCollector->seqIndex <= seqCollector->maxSequences);
return 0;
} }
size_t ZSTD_sequenceBound(size_t srcSize) { size_t ZSTD_sequenceBound(size_t srcSize) {
return (srcSize / ZSTD_MINMATCH_MIN) + 1; const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
return maxNbSeq + maxNbDelims;
} }
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
@ -3378,6 +3460,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
const size_t dstCapacity = ZSTD_compressBound(srcSize); const size_t dstCapacity = ZSTD_compressBound(srcSize);
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
SeqCollector seqCollector; SeqCollector seqCollector;
{
int targetCBlockSize;
FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
}
{
int nbWorkers;
FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
}
RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
@ -3387,8 +3479,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
seqCollector.maxSequences = outSeqsSize; seqCollector.maxSequences = outSeqsSize;
zc->seqCollector = seqCollector; zc->seqCollector = seqCollector;
ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); {
ZSTD_customFree(dst, ZSTD_defaultCMem); const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
ZSTD_customFree(dst, ZSTD_defaultCMem);
FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
}
assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
return zc->seqCollector.seqIndex; return zc->seqCollector.seqIndex;
} }
@ -3981,8 +4077,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
cSeqsSize = 1; cSeqsSize = 1;
} }
/* Sequence collection not supported when block splitting */
if (zc->seqCollector.collectSequences) { if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc); FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0; return 0;
} }
@ -4204,6 +4301,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
if (bss == ZSTDbss_noCompress) { if (bss == ZSTDbss_noCompress) {
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
@ -4236,11 +4334,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } if (bss == ZSTDbss_noCompress) {
RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
cSize = 0;
goto out;
}
} }
if (zc->seqCollector.collectSequences) { if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc); FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0; return 0;
} }
@ -4553,19 +4655,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
} }
} }
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
{ {
RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, assert(cctx->stage == ZSTDcs_init);
"wrong cctx stage"); assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable);
RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
parameter_unsupported,
"incompatible with ldm");
cctx->externSeqStore.seq = seq; cctx->externSeqStore.seq = seq;
cctx->externSeqStore.size = nbSeq; cctx->externSeqStore.size = nbSeq;
cctx->externSeqStore.capacity = nbSeq; cctx->externSeqStore.capacity = nbSeq;
cctx->externSeqStore.pos = 0; cctx->externSeqStore.pos = 0;
cctx->externSeqStore.posInSequence = 0; cctx->externSeqStore.posInSequence = 0;
return 0;
} }
@ -4760,12 +4858,19 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
ZSTD_fillHashTable(ms, iend, dtlm, tfp); ZSTD_fillHashTable(ms, iend, dtlm, tfp);
break; break;
case ZSTD_dfast: case ZSTD_dfast:
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
#else
assert(0); /* shouldn't be called: cparams should've been adjusted. */
#endif
break; break;
case ZSTD_greedy: case ZSTD_greedy:
case ZSTD_lazy: case ZSTD_lazy:
case ZSTD_lazy2: case ZSTD_lazy2:
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR)
assert(srcSize >= HASH_READ_SIZE); assert(srcSize >= HASH_READ_SIZE);
if (ms->dedicatedDictSearch) { if (ms->dedicatedDictSearch) {
assert(ms->chainTable != NULL); assert(ms->chainTable != NULL);
@ -4782,14 +4887,23 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
DEBUGLOG(4, "Using chain-based hash table for lazy dict"); DEBUGLOG(4, "Using chain-based hash table for lazy dict");
} }
} }
#else
assert(0); /* shouldn't be called: cparams should've been adjusted. */
#endif
break; break;
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt: case ZSTD_btopt:
case ZSTD_btultra: case ZSTD_btultra:
case ZSTD_btultra2: case ZSTD_btultra2:
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
assert(srcSize >= HASH_READ_SIZE); assert(srcSize >= HASH_READ_SIZE);
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
#else
assert(0); /* shouldn't be called: cparams should've been adjusted. */
#endif
break; break;
default: default:
@ -4836,11 +4950,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
/* We only set the loaded table as valid if it contains all non-zero /* We only set the loaded table as valid if it contains all non-zero
* weights. Otherwise, we set it to check */ * weights. Otherwise, we set it to check */
if (!hasZeroWeights) if (!hasZeroWeights && maxSymbolValue == 255)
bs->entropy.huf.repeatMode = HUF_repeat_valid; bs->entropy.huf.repeatMode = HUF_repeat_valid;
RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
dictPtr += hufHeaderSize; dictPtr += hufHeaderSize;
} }
@ -5107,14 +5220,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{ {
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart; BYTE* op = ostart;
size_t fhSize = 0;
DEBUGLOG(4, "ZSTD_writeEpilogue"); DEBUGLOG(4, "ZSTD_writeEpilogue");
RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
/* special case : empty frame */ /* special case : empty frame */
if (cctx->stage == ZSTDcs_init) { if (cctx->stage == ZSTDcs_init) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
dstCapacity -= fhSize; dstCapacity -= fhSize;
op += fhSize; op += fhSize;
@ -5124,8 +5236,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
if (cctx->stage != ZSTDcs_ending) { if (cctx->stage != ZSTDcs_ending) {
/* write one last empty block, make it the "last" block */ /* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3);
MEM_writeLE32(op, cBlockHeader24); RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue");
MEM_writeLE24(op, cBlockHeader24);
op += ZSTD_blockHeaderSize; op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize; dstCapacity -= ZSTD_blockHeaderSize;
} }
@ -5455,7 +5568,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
customMem); customMem);
if (ZSTD_isError( ZSTD_initCDict_internal(cdict, if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize, dict, dictSize,
dictLoadMethod, dictContentType, dictLoadMethod, dictContentType,
cctxParams) )) { cctxParams) )) {
@ -5879,7 +5992,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) { if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
assert(input->pos >= zcs->stableIn_notConsumed); assert(input->pos >= zcs->stableIn_notConsumed);
input->pos -= zcs->stableIn_notConsumed; input->pos -= zcs->stableIn_notConsumed;
ip -= zcs->stableIn_notConsumed; if (ip) ip -= zcs->stableIn_notConsumed;
zcs->stableIn_notConsumed = 0; zcs->stableIn_notConsumed = 0;
} }
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
@ -6138,7 +6251,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */ /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
RETURN_ERROR_IF( RETURN_ERROR_IF(
params.useSequenceProducer == 1 && params.nbWorkers >= 1, ZSTD_hasExtSeqProd(&params) && params.nbWorkers >= 1,
parameter_combination_unsupported, parameter_combination_unsupported,
"External sequence producer isn't supported with nbWorkers >= 1" "External sequence producer isn't supported with nbWorkers >= 1"
); );
@ -6430,7 +6543,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
if (cctx->appliedParams.validateSequences) { if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength; seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer), cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
"Sequence validation failed"); "Sequence validation failed");
} }
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
@ -6568,7 +6681,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
if (cctx->appliedParams.validateSequences) { if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength; seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer), cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
"Sequence validation failed"); "Sequence validation failed");
} }
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
@ -7014,19 +7127,27 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
} }
void ZSTD_registerSequenceProducer( void ZSTD_registerSequenceProducer(
ZSTD_CCtx* zc, void* mState, ZSTD_CCtx* zc,
ZSTD_sequenceProducer_F* mFinder void* extSeqProdState,
ZSTD_sequenceProducer_F extSeqProdFunc
) { ) {
if (mFinder != NULL) { assert(zc != NULL);
ZSTD_externalMatchCtx emctx; ZSTD_CCtxParams_registerSequenceProducer(
emctx.mState = mState; &zc->requestedParams, extSeqProdState, extSeqProdFunc
emctx.mFinder = mFinder; );
emctx.seqBuffer = NULL; }
emctx.seqBufferCapacity = 0;
zc->externalMatchCtx = emctx; void ZSTD_CCtxParams_registerSequenceProducer(
zc->requestedParams.useSequenceProducer = 1; ZSTD_CCtx_params* params,
void* extSeqProdState,
ZSTD_sequenceProducer_F extSeqProdFunc
) {
assert(params != NULL);
if (extSeqProdFunc != NULL) {
params->extSeqProdFunc = extSeqProdFunc;
params->extSeqProdState = extSeqProdState;
} else { } else {
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); params->extSeqProdFunc = NULL;
zc->requestedParams.useSequenceProducer = 0; params->extSeqProdState = NULL;
} }
} }

View File

@ -39,7 +39,7 @@ extern "C" {
It's not a big deal though : candidate will just be sorted again. It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost. Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
@ -159,23 +159,24 @@ typedef struct {
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct { typedef struct {
int price; int price; /* price from beginning of segment to this position */
U32 off; U32 off; /* offset of previous match */
U32 mlen; U32 mlen; /* length of previous match */
U32 litlen; U32 litlen; /* nb of literals since previous match */
U32 rep[ZSTD_REP_NUM]; U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */
} ZSTD_optimal_t; } ZSTD_optimal_t;
typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
typedef struct { typedef struct {
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
unsigned* litFreq; /* table of literals statistics, of size 256 */ unsigned* litFreq; /* table of literals statistics, of size 256 */
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
U32 litSum; /* nb of literals */ U32 litSum; /* nb of literals */
U32 litLengthSum; /* nb of litLength codes */ U32 litLengthSum; /* nb of litLength codes */
@ -228,7 +229,7 @@ struct ZSTD_matchState_t {
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */ U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable; U32* hashTable;
@ -360,10 +361,11 @@ struct ZSTD_CCtx_params_s {
* if the external matchfinder returns an error code. */ * if the external matchfinder returns an error code. */
int enableMatchFinderFallback; int enableMatchFinderFallback;
/* Indicates whether an external matchfinder has been referenced. /* Parameters for the external sequence producer API.
* Users can't set this externally. * Users set these parameters through ZSTD_registerSequenceProducer().
* It is set internally in ZSTD_registerSequenceProducer(). */ * It is not possible to set these parameters individually through the public API. */
int useSequenceProducer; void* extSeqProdState;
ZSTD_sequenceProducer_F extSeqProdFunc;
/* Adjust the max block size*/ /* Adjust the max block size*/
size_t maxBlockSize; size_t maxBlockSize;
@ -401,14 +403,6 @@ typedef struct {
ZSTD_entropyCTablesMetadata_t entropyMetadata; ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx; } ZSTD_blockSplitCtx;
/* Context for block-level external matchfinder API */
typedef struct {
void* mState;
ZSTD_sequenceProducer_F* mFinder;
ZSTD_Sequence* seqBuffer;
size_t seqBufferCapacity;
} ZSTD_externalMatchCtx;
struct ZSTD_CCtx_s { struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage; ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@ -479,8 +473,9 @@ struct ZSTD_CCtx_s {
/* Workspace for block splitter */ /* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx; ZSTD_blockSplitCtx blockSplitCtx;
/* Workspace for external matchfinder */ /* Buffer for output from external sequence producer */
ZSTD_externalMatchCtx externalMatchCtx; ZSTD_Sequence* extSeqBuf;
size_t extSeqBufCapacity;
}; };
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@ -1053,7 +1048,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
* The least significant cycleLog bits of the indices must remain the same, * The least significant cycleLog bits of the indices must remain the same,
* which may be 0. Every index up to maxDist in the past must be valid. * which may be 0. Every index up to maxDist in the past must be valid.
*/ */
MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
U32 maxDist, void const* src) U32 maxDist, void const* src)
{ {
/* preemptive overflow correction: /* preemptive overflow correction:
@ -1246,7 +1243,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
* forget about the extDict. Handles overlap of the prefix and extDict. * forget about the extDict. Handles overlap of the prefix and extDict.
* Returns non-zero if the segment is contiguous. * Returns non-zero if the segment is contiguous.
*/ */
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 ZSTD_window_update(ZSTD_window_t* window,
void const* src, size_t srcSize, void const* src, size_t srcSize,
int forceNonContiguous) int forceNonContiguous)
{ {
@ -1467,11 +1466,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
* This cannot be used when long range matching is enabled. * This cannot be used when long range matching is enabled.
* Zstd will use these sequences, and pass the literals to a secondary block * Zstd will use these sequences, and pass the literals to a secondary block
* compressor. * compressor.
* @return : An error code on failure.
* NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
* access and data corruption. * access and data corruption.
*/ */
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
/** ZSTD_cycleLog() : /** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */ * condition for correct operation : hashLog > 1 */
@ -1509,6 +1507,10 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
return params->extSeqProdFunc != NULL;
}
/* =============================================================== /* ===============================================================
* Deprecated definitions that are still used internally to avoid * Deprecated definitions that are still used internally to avoid

View File

@ -76,8 +76,8 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
} }
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0; { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags) const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags); : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
op += cSize; op += cSize;
cLitSize += cSize; cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) { if (cSize == 0 || ERR_isError(cSize)) {
@ -102,7 +102,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
switch(lhSize) switch(lhSize)
{ {
case 3: /* 2 - 2 - 10 - 10 */ case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc); MEM_writeLE24(ostart, lhc);
break; break;
} }
@ -122,30 +122,30 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
} }
*entropyWritten = 1; *entropyWritten = 1;
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
return op-ostart; return (size_t)(op-ostart);
} }
static size_t static size_t
ZSTD_seqDecompressedSize(seqStore_t const* seqStore, ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
const seqDef* sequences, size_t nbSeq, const seqDef* sequences, size_t nbSeqs,
size_t litSize, int lastSequence) size_t litSize, int lastSubBlock)
{ {
const seqDef* const sstart = sequences;
const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart;
size_t matchLengthSum = 0; size_t matchLengthSum = 0;
size_t litLengthSum = 0; size_t litLengthSum = 0;
(void)(litLengthSum); /* suppress unused variable warning on some environments */ size_t n;
while (send-sp > 0) { for (n=0; n<nbSeqs; n++) {
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
litLengthSum += seqLen.litLength; litLengthSum += seqLen.litLength;
matchLengthSum += seqLen.matchLength; matchLengthSum += seqLen.matchLength;
sp++;
} }
assert(litLengthSum <= litSize); DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
if (!lastSequence) { (unsigned)nbSeqs, (const void*)sequences,
(unsigned)litLengthSum, (unsigned)matchLengthSum);
if (!lastSubBlock)
assert(litLengthSum == litSize); assert(litLengthSum == litSize);
} else
assert(litLengthSum <= litSize);
(void)litLengthSum;
return matchLengthSum + litSize; return matchLengthSum + litSize;
} }
@ -180,14 +180,14 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
/* Sequences Header */ /* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall, ""); dstSize_tooSmall, "");
if (nbSeq < 0x7F) if (nbSeq < 128)
*op++ = (BYTE)nbSeq; *op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ) else if (nbSeq < LONGNBSEQ)
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) { if (nbSeq==0) {
return op - ostart; return (size_t)(op - ostart);
} }
/* seqHead : flags for FSE encoding type */ /* seqHead : flags for FSE encoding type */
@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
} }
{ size_t const bitstreamSize = ZSTD_encodeSequences( { size_t const bitstreamSize = ZSTD_encodeSequences(
op, oend - op, op, (size_t)(oend - op),
fseTables->matchlengthCTable, mlCode, fseTables->matchlengthCTable, mlCode,
fseTables->offcodeCTable, ofCode, fseTables->offcodeCTable, ofCode,
fseTables->litlengthCTable, llCode, fseTables->litlengthCTable, llCode,
@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
#endif #endif
*entropyWritten = 1; *entropyWritten = 1;
return op - ostart; return (size_t)(op - ostart);
} }
/** ZSTD_compressSubBlock() : /** ZSTD_compressSubBlock() :
@ -279,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
&entropyMetadata->hufMetadata, literals, litSize, &entropyMetadata->hufMetadata, literals, litSize,
op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); op, (size_t)(oend-op),
bmi2, writeLitEntropy, litEntropyWritten);
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
if (cLitSize == 0) return 0; if (cLitSize == 0) return 0;
op += cLitSize; op += cLitSize;
@ -289,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
sequences, nbSeq, sequences, nbSeq,
llCode, mlCode, ofCode, llCode, mlCode, ofCode,
cctxParams, cctxParams,
op, oend-op, op, (size_t)(oend-op),
bmi2, writeSeqEntropy, seqEntropyWritten); bmi2, writeSeqEntropy, seqEntropyWritten);
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
if (cSeqSize == 0) return 0; if (cSeqSize == 0) return 0;
op += cSeqSize; op += cSeqSize;
} }
/* Write block header */ /* Write block header */
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(ostart, cBlockHeader24); MEM_writeLE24(ostart, cBlockHeader24);
} }
return op-ostart; return (size_t)(op-ostart);
} }
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@ -389,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
return cSeqSizeEstimate + sequencesSectionHeaderSize; return cSeqSizeEstimate + sequencesSectionHeaderSize;
} }
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, typedef struct {
size_t estLitSize;
size_t estBlockSize;
} EstimatedBlockSize;
static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable, const BYTE* ofCodeTable,
const BYTE* llCodeTable, const BYTE* llCodeTable,
const BYTE* mlCodeTable, const BYTE* mlCodeTable,
@ -397,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const ZSTD_entropyCTables_t* entropy, const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata, const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize, void* workspace, size_t wkspSize,
int writeLitEntropy, int writeSeqEntropy) { int writeLitEntropy, int writeSeqEntropy)
size_t cSizeEstimate = 0; {
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, EstimatedBlockSize ebs;
&entropy->huf, &entropyMetadata->hufMetadata, ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
workspace, wkspSize, writeLitEntropy); &entropy->huf, &entropyMetadata->hufMetadata,
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, workspace, wkspSize, writeLitEntropy);
ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy); workspace, wkspSize, writeSeqEntropy);
return cSizeEstimate + ZSTD_blockHeaderSize; ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
return ebs;
} }
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@ -419,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
return 0; return 0;
} }
static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
{
size_t n, total = 0;
assert(sp != NULL);
for (n=0; n<seqCount; n++) {
total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
}
DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
return total;
}
#define BYTESCALE 256
static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
int firstSubBlock)
{
size_t n, budget = 0, inSize=0;
/* entropy headers */
size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
assert(firstSubBlock==0 || firstSubBlock==1);
budget += headerSize;
/* first sequence => at least one sequence*/
budget += sp[0].litLength * avgLitCost + avgSeqCost;
if (budget > targetBudget) return 1;
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
/* loop over sequences */
for (n=1; n<nbSeqs; n++) {
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
budget += currentCost;
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
/* stop when sub-block budget is reached */
if ( (budget > targetBudget)
/* though continue to expand until the sub-block is deemed compressible */
&& (budget < inSize * BYTESCALE) )
break;
}
return n;
}
/** ZSTD_compressSubBlock_multi() : /** ZSTD_compressSubBlock_multi() :
* Breaks super-block into multiple sub-blocks and compresses them. * Breaks super-block into multiple sub-blocks and compresses them.
* Entropy will be written to the first block. * Entropy will be written into the first block.
* The following blocks will use repeat mode to compress. * The following blocks use repeat_mode to compress.
* All sub-blocks are compressed blocks (no raw or rle blocks). * Sub-blocks are all compressed, except the last one when beneficial.
* @return : compressed size of the super block (which is multiple ZSTD blocks) * @return : compressed size of the super block (which features multiple ZSTD blocks)
* Or 0 if it failed to compress. */ * or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const ZSTD_compressedBlockState_t* prevCBlock, const ZSTD_compressedBlockState_t* prevCBlock,
ZSTD_compressedBlockState_t* nextCBlock, ZSTD_compressedBlockState_t* nextCBlock,
@ -438,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
{ {
const seqDef* const sstart = seqStorePtr->sequencesStart; const seqDef* const sstart = seqStorePtr->sequencesStart;
const seqDef* const send = seqStorePtr->sequences; const seqDef* const send = seqStorePtr->sequences;
const seqDef* sp = sstart; const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
size_t const nbSeqs = (size_t)(send - sstart);
const BYTE* const lstart = seqStorePtr->litStart; const BYTE* const lstart = seqStorePtr->litStart;
const BYTE* const lend = seqStorePtr->lit; const BYTE* const lend = seqStorePtr->lit;
const BYTE* lp = lstart; const BYTE* lp = lstart;
size_t const nbLiterals = (size_t)(lend - lstart);
BYTE const* ip = (BYTE const*)src; BYTE const* ip = (BYTE const*)src;
BYTE const* const iend = ip + srcSize; BYTE const* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
@ -450,96 +502,152 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const BYTE* llCodePtr = seqStorePtr->llCode; const BYTE* llCodePtr = seqStorePtr->llCode;
const BYTE* mlCodePtr = seqStorePtr->mlCode; const BYTE* mlCodePtr = seqStorePtr->mlCode;
const BYTE* ofCodePtr = seqStorePtr->ofCode; const BYTE* ofCodePtr = seqStorePtr->ofCode;
size_t targetCBlockSize = cctxParams->targetCBlockSize; size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
size_t litSize, seqCount; size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
int writeSeqEntropy = 1; int writeSeqEntropy = 1;
int lastSequence = 0;
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
(unsigned)(lend-lp), (unsigned)(send-sstart)); (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
litSize = 0; /* let's start by a general estimation for the full block */
seqCount = 0; if (nbSeqs > 0) {
do { EstimatedBlockSize const ebs =
size_t cBlockSizeEstimate = 0; ZSTD_estimateSubBlockSize(lp, nbLiterals,
if (sstart == send) { ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
lastSequence = 1; &nextCBlock->entropy, entropyMetadata,
} else { workspace, wkspSize,
const seqDef* const sequence = sp + seqCount; writeLitEntropy, writeSeqEntropy);
lastSequence = sequence == send - 1; /* quick estimation */
litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
seqCount++; size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
size_t n, avgBlockBudget, blockBudgetSupp=0;
avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
(unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
(unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
/* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
* this will result in the production of a single uncompressed block covering @srcSize.*/
if (ebs.estBlockSize > srcSize) return 0;
/* compress and write sub-blocks */
assert(nbSubBlocks>0);
for (n=0; n < nbSubBlocks-1; n++) {
/* determine nb of sequences for current sub-block + nbLiterals from next sequence */
size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
/* if reached last sequence : break to last sub-block (simplification) */
assert(seqCount <= (size_t)(send-sp));
if (sp + seqCount == send) break;
assert(seqCount > 0);
/* compress sub-block */
{ int litEntropyWritten = 0;
int seqEntropyWritten = 0;
size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
const size_t decompressedSize =
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, (size_t)(oend-op),
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
0);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
/* check compressibility, update state components */
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
(unsigned)decompressedSize, (unsigned)cSize);
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
blockBudgetSupp = 0;
} }
/* otherwise : do not compress yet, coalesce current sub-block with following one */
} }
if (lastSequence) { } /* if (nbSeqs > 0) */
assert(lp <= lend);
assert(litSize <= (size_t)(lend - lp)); /* write last block */
litSize = (size_t)(lend - lp); DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
} { int litEntropyWritten = 0;
/* I think there is an optimization opportunity here. int seqEntropyWritten = 0;
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful size_t litSize = (size_t)(lend - lp);
* since it recalculates estimate from scratch. size_t seqCount = (size_t)(send - sp);
* For example, it would recount literal distribution and symbol codes every time. const size_t decompressedSize =
*/ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
&nextCBlock->entropy, entropyMetadata, sp, seqCount,
workspace, wkspSize, writeLitEntropy, writeSeqEntropy); lp, litSize,
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { llCodePtr, mlCodePtr, ofCodePtr,
int litEntropyWritten = 0; cctxParams,
int seqEntropyWritten = 0; op, (size_t)(oend-op),
const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); bmi2, writeLitEntropy, writeSeqEntropy,
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, &litEntropyWritten, &seqEntropyWritten,
sp, seqCount, lastBlock);
lp, litSize, FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams, /* update pointers, the nb of literals borrowed from next sequence must be preserved */
op, oend-op, if (cSize > 0 && cSize < decompressedSize) {
bmi2, writeLitEntropy, writeSeqEntropy, DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
&litEntropyWritten, &seqEntropyWritten, (unsigned)decompressedSize, (unsigned)cSize);
lastBlock && lastSequence); assert(ip + decompressedSize <= iend);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); ip += decompressedSize;
if (cSize > 0 && cSize < decompressedSize) { lp += litSize;
DEBUGLOG(5, "Committed the sub-block"); op += cSize;
assert(ip + decompressedSize <= iend); llCodePtr += seqCount;
ip += decompressedSize; mlCodePtr += seqCount;
sp += seqCount; ofCodePtr += seqCount;
lp += litSize; /* Entropy only needs to be written once */
op += cSize; if (litEntropyWritten) {
llCodePtr += seqCount; writeLitEntropy = 0;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
litSize = 0;
seqCount = 0;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
} }
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
} }
} while (!lastSequence); }
if (writeLitEntropy) { if (writeLitEntropy) {
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); DEBUGLOG(5, "Literal entropy tables were never written");
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
} }
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
/* If we haven't written our entropy tables, then we've violated our contract and /* If we haven't written our entropy tables, then we've violated our contract and
* must emit an uncompressed block. * must emit an uncompressed block.
*/ */
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
return 0; return 0;
} }
if (ip < iend) { if (ip < iend) {
size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); /* some data left : last part of the block sent uncompressed */
DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); size_t const rSize = (size_t)((iend - ip));
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
assert(cSize != 0); assert(cSize != 0);
op += cSize; op += cSize;
/* We have to regenerate the repcodes because we've skipped some sequences */ /* We have to regenerate the repcodes because we've skipped some sequences */
if (sp < send) { if (sp < send) {
seqDef const* seq; const seqDef* seq;
repcodes_t rep; repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) { for (seq = sstart; seq < sp; ++seq) {
@ -548,14 +656,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
} }
} }
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
return op-ostart; DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
(unsigned)(op-ostart));
return (size_t)(op-ostart);
} }
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
void const* src, size_t srcSize, const void* src, size_t srcSize,
unsigned lastBlock) { unsigned lastBlock)
{
ZSTD_entropyCTablesMetadata_t entropyMetadata; ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,

View File

@ -192,6 +192,7 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
{ {
intptr_t const offset = __msan_test_shadow(ws->initOnceStart, intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart); (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
(void)offset;
#if defined(ZSTD_MSAN_PRINT) #if defined(ZSTD_MSAN_PRINT)
if(offset!=-1) { if(offset!=-1) {
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32); __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
@ -433,7 +434,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
/** /**
* Aligned on 64 bytes. These buffers have the special property that * Aligned on 64 bytes. These buffers have the special property that
* their values remain constrained, allowing us to re-use them without * their values remain constrained, allowing us to reuse them without
* memset()-ing them. * memset()-ing them.
*/ */
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
@ -525,7 +526,7 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't /* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table * access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. * space every time we mark it dirty.
* Since tableValidEnd space and initOnce space may overlap we don't poison * Since tableValidEnd space and initOnce space may overlap we don't poison
@ -602,9 +603,9 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing!"); DEBUGLOG(4, "cwksp: clearing!");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't /* To validate that the context reuse logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison" * access stuff that this compression hasn't initialized, we re-"poison"
* the workspace except for the areas in which we expect memory re-use * the workspace except for the areas in which we expect memory reuse
* without initialization (objects, valid tables area and init once * without initialization (objects, valid tables area and init once
* memory). */ * memory). */
{ {
@ -635,6 +636,15 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
ZSTD_cwksp_assert_internal_consistency(ws); ZSTD_cwksp_assert_internal_consistency(ws);
} }
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
}
/** /**
* The provided workspace takes ownership of the buffer [start, start+size). * The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed * Any existing values in the workspace are ignored (the previously managed
@ -666,6 +676,11 @@ MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace; void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace"); DEBUGLOG(4, "cwksp: freeing workspace");
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
if (ptr != NULL && customMem.customFree != NULL) {
__msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
}
#endif
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_customFree(ptr, customMem); ZSTD_customFree(ptr, customMem);
} }
@ -679,15 +694,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
} }
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
}
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
return ws->allocFailed; return ws->allocFailed;
} }

View File

@ -11,7 +11,11 @@
#include "zstd_compress_internal.h" #include "zstd_compress_internal.h"
#include "zstd_double_fast.h" #include "zstd_double_fast.h"
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm) void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -47,7 +51,9 @@ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
} } } }
} }
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm) void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -95,6 +101,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_noDict_generic( size_t ZSTD_compressBlock_doubleFast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls /* template */) void const* src, size_t srcSize, U32 const mls /* template */)
@ -305,6 +312,7 @@ _match_stored:
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, void const* src, size_t srcSize,
@ -348,8 +356,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
if (ms->prefetchCDictTables) { if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes) PREFETCH_AREA(dictHashLong, hashTableBytes);
PREFETCH_AREA(dictHashSmall, chainTableBytes) PREFETCH_AREA(dictHashSmall, chainTableBytes);
} }
/* init */ /* init */
@ -589,7 +597,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
} }
static size_t ZSTD_compressBlock_doubleFast_extDict_generic( static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, void const* src, size_t srcSize,
U32 const mls /* template */) U32 const mls /* template */)
@ -756,3 +766,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
} }
} }
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */

View File

@ -18,9 +18,12 @@ extern "C" {
#include "../common/mem.h" /* U32 */ #include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm, void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp); ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_doubleFast( size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
@ -31,6 +34,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
#else
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
#if defined (__cplusplus) #if defined (__cplusplus)
} }

View File

@ -11,7 +11,9 @@
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h" #include "zstd_fast.h"
static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms, static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
const void* const end, const void* const end,
ZSTD_dictTableLoadMethod_e dtlm) ZSTD_dictTableLoadMethod_e dtlm)
{ {
@ -46,7 +48,9 @@ static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
} } } } } } } }
} }
static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms, static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
const void* const end, const void* const end,
ZSTD_dictTableLoadMethod_e dtlm) ZSTD_dictTableLoadMethod_e dtlm)
{ {
@ -139,8 +143,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
* *
* This is also the work we do at the beginning to enter the loop initially. * This is also the work we do at the beginning to enter the loop initially.
*/ */
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_fast_noDict_generic( ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_fast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, void const* src, size_t srcSize,
U32 const mls, U32 const hasStep) U32 const mls, U32 const hasStep)
@ -456,6 +461,7 @@ size_t ZSTD_compressBlock_fast(
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_fast_dictMatchState_generic( size_t ZSTD_compressBlock_fast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls, U32 const hasStep) void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
@ -502,7 +508,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
if (ms->prefetchCDictTables) { if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
PREFETCH_AREA(dictHashTable, hashTableBytes) PREFETCH_AREA(dictHashTable, hashTableBytes);
} }
/* init */ /* init */
@ -681,7 +687,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
} }
static size_t ZSTD_compressBlock_fast_extDict_generic( static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_fast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls, U32 const hasStep) void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
{ {

View File

@ -12,6 +12,11 @@
#include "zstd_lazy.h" #include "zstd_lazy.h"
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
#define kLazySkippingStep 8 #define kLazySkippingStep 8
@ -19,8 +24,9 @@
* Binary Tree search * Binary Tree search
***************************************/ ***************************************/
static void static
ZSTD_updateDUBT(ZSTD_matchState_t* ms, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iend, const BYTE* ip, const BYTE* iend,
U32 mls) U32 mls)
{ {
@ -63,8 +69,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
* sort one already inserted but unsorted position * sort one already inserted but unsorted position
* assumption : curr >= btlow == (curr - btmask) * assumption : curr >= btlow == (curr - btmask)
* doesn't fail */ * doesn't fail */
static void static
ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
U32 curr, const BYTE* inputEnd, U32 curr, const BYTE* inputEnd,
U32 nbCompares, U32 btLow, U32 nbCompares, U32 btLow,
const ZSTD_dictMode_e dictMode) const ZSTD_dictMode_e dictMode)
@ -152,8 +159,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
} }
static size_t static
ZSTD_DUBT_findBetterDictMatch ( ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_DUBT_findBetterDictMatch (
const ZSTD_matchState_t* ms, const ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend, const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr, size_t* offsetPtr,
@ -230,8 +238,9 @@ ZSTD_DUBT_findBetterDictMatch (
} }
static size_t static
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend, const BYTE* const ip, const BYTE* const iend,
size_t* offBasePtr, size_t* offBasePtr,
U32 const mls, U32 const mls,
@ -381,8 +390,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE
ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit, const BYTE* const ip, const BYTE* const iLimit,
size_t* offBasePtr, size_t* offBasePtr,
const U32 mls /* template */, const U32 mls /* template */,
@ -617,7 +627,9 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
/* Update chains up to ip (excluded) /* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */ Assumption : always within prefix (i.e. not within extDict) */
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 ZSTD_insertAndFindFirstIndex_internal(
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
const ZSTD_compressionParameters* const cParams, const ZSTD_compressionParameters* const cParams,
const BYTE* ip, U32 const mls, U32 const lazySkipping) const BYTE* ip, U32 const mls, U32 const lazySkipping)
@ -651,6 +663,7 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
/* inlining is important to hardwire a hot branch (template emulation) */ /* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_HcFindBestMatch( size_t ZSTD_HcFindBestMatch(
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit, const BYTE* const ip, const BYTE* const iLimit,
@ -819,7 +832,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* t
* Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
* but not beyond iLimit. * but not beyond iLimit.
*/ */
FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
U32 const rowLog, U32 const mls, U32 const rowLog, U32 const mls,
U32 idx, const BYTE* const iLimit) U32 idx, const BYTE* const iLimit)
{ {
@ -845,7 +860,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
* Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/ */
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
BYTE const* tagTable, BYTE const* base, BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog, U32 idx, U32 const hashLog,
U32 const rowLog, U32 const mls, U32 const rowLog, U32 const mls,
@ -863,10 +880,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
/* ZSTD_row_update_internalImpl(): /* ZSTD_row_update_internalImpl():
* Updates the hash table with positions starting from updateStartIdx until updateEndIdx. * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
*/ */
FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, FORCE_INLINE_TEMPLATE
U32 updateStartIdx, U32 const updateEndIdx, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 const mls, U32 const rowLog, void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
U32 const rowMask, U32 const useCache) U32 updateStartIdx, U32 const updateEndIdx,
U32 const mls, U32 const rowLog,
U32 const rowMask, U32 const useCache)
{ {
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
BYTE* const tagTable = ms->tagTable; BYTE* const tagTable = ms->tagTable;
@ -892,9 +911,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
* Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
* Skips sections of long matches as is necessary. * Skips sections of long matches as is necessary.
*/ */
FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, FORCE_INLINE_TEMPLATE
U32 const mls, U32 const rowLog, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 const rowMask, U32 const useCache) void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
U32 const mls, U32 const rowLog,
U32 const rowMask, U32 const useCache)
{ {
U32 idx = ms->nextToUpdate; U32 idx = ms->nextToUpdate;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
@ -1102,20 +1123,21 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr
/* The high-level approach of the SIMD row based match finder is as follows: /* The high-level approach of the SIMD row based match finder is as follows:
* - Figure out where to insert the new entry: * - Figure out where to insert the new entry:
* - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" * - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
* - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines * - The hash is salted by a value that changes on every contex reset, so when the same table is used
* we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
* - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
* which row to insert into. * which row to insert into.
* - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
* be considered as a circular buffer with a "head" index that resides in the tagTable. * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
* - Also insert the "tag" into the equivalent row and position in the tagTable. * per row).
* - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
* The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
* for alignment/performance reasons, leaving some bytes unused.
* - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
* generate a bitfield that we can cycle through to check the collisions in the hash table. * generate a bitfield that we can cycle through to check the collisions in the hash table.
* - Pick the longest match. * - Pick the longest match.
* - Insert the tag into the equivalent row and position in the tagTable.
*/ */
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_RowFindBestMatch( size_t ZSTD_RowFindBestMatch(
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit, const BYTE* const ip, const BYTE* const iLimit,
@ -1489,8 +1511,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
* Common parser - lazy strategy * Common parser - lazy strategy
*********************************/ *********************************/
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_lazy_generic( ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM], U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const void* src, size_t srcSize,
@ -1754,29 +1777,10 @@ _storeSequence:
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
} }
#endif /* build exclusions */
size_t ZSTD_compressBlock_btlazy2( #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_greedy( size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -1784,27 +1788,6 @@ size_t ZSTD_compressBlock_greedy(
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
} }
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_greedy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -1812,21 +1795,6 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
} }
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
}
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
}
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -1834,21 +1802,6 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
} }
/* Row-based matchfinder */
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_greedy_row( size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -1856,11 +1809,48 @@ size_t ZSTD_compressBlock_greedy_row(
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
} }
size_t ZSTD_compressBlock_lazy2_dictMatchState_row( size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
}
#endif
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
}
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
} }
size_t ZSTD_compressBlock_lazy_dictMatchState_row( size_t ZSTD_compressBlock_lazy_dictMatchState_row(
@ -1870,13 +1860,49 @@ size_t ZSTD_compressBlock_lazy_dictMatchState_row(
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
} }
size_t ZSTD_compressBlock_greedy_dictMatchState_row( size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
}
#endif
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
} }
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
}
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -1884,22 +1910,30 @@ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
{ {
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
} }
#endif
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
} }
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
} }
#endif
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_lazy_extDict_generic( size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM], U32 rep[ZSTD_REP_NUM],
@ -2101,8 +2135,9 @@ _storeSequence:
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
} }
#endif /* build exclusions */
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_greedy_extDict( size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -2110,6 +2145,15 @@ size_t ZSTD_compressBlock_greedy_extDict(
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
} }
size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
}
#endif
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy_extDict( size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -2118,29 +2162,6 @@ size_t ZSTD_compressBlock_lazy_extDict(
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
} }
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
}
size_t ZSTD_compressBlock_lazy_extDict_row( size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize) void const* src, size_t srcSize)
@ -2148,6 +2169,16 @@ size_t ZSTD_compressBlock_lazy_extDict_row(
{ {
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
} }
#endif
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
size_t ZSTD_compressBlock_lazy2_extDict_row( size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -2155,3 +2186,14 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
{ {
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
} }
#endif
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
#endif

View File

@ -27,98 +27,173 @@ extern "C" {
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
#endif
size_t ZSTD_compressBlock_btlazy2( #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy( size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_row( size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState_row( size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict( size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict_row( size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_GREEDY NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict_row( size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_LAZY NULL
#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict_row( size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict( size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
#else
#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
#endif
#if defined (__cplusplus) #if defined (__cplusplus)
} }

View File

@ -246,7 +246,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
break; break;
case ZSTD_dfast: case ZSTD_dfast:
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
#else
assert(0); /* shouldn't be called: cparams should've been adjusted. */
#endif
break; break;
case ZSTD_greedy: case ZSTD_greedy:
@ -318,7 +322,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
} }
} }
static size_t ZSTD_ldm_generateSequences_internal( static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
ldmParams_t const* params, void const* src, size_t srcSize) ldmParams_t const* params, void const* src, size_t srcSize)
{ {
@ -689,7 +695,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* maybeSplitSequence updates rawSeqStore->pos */ /* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore, rawSeq const sequence = maybeSplitSequence(rawSeqStore,
(U32)(iend - ip), minMatch); (U32)(iend - ip), minMatch);
int i;
/* End signal */ /* End signal */
if (sequence.offset == 0) if (sequence.offset == 0)
break; break;
@ -702,6 +707,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* Run the block compressor */ /* Run the block compressor */
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{ {
int i;
size_t const newLitLength = size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength); blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
ip += sequence.litLength; ip += sequence.litLength;

View File

@ -12,6 +12,9 @@
#include "hist.h" #include "hist.h"
#include "zstd_opt.h" #include "zstd_opt.h"
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30) #define ZSTD_MAX_PRICE (1<<30)
@ -264,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
const optState_t* const optPtr, const optState_t* const optPtr,
int optLevel) int optLevel)
{ {
DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
if (litLength == 0) return 0; if (litLength == 0) return 0;
if (!ZSTD_compressedLiterals(optPtr)) if (!ZSTD_compressedLiterals(optPtr))
@ -402,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded) /* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */ Assumption : always within prefix (i.e. not within extDict) */
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, static
U32* nextToUpdate3, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
const BYTE* const ip) U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip)
{ {
U32* const hashTable3 = ms->hashTable3; U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3; U32 const hashLog3 = ms->hashLog3;
@ -431,7 +437,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
* @param ip assumed <= iend-8 . * @param ip assumed <= iend-8 .
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
* @return : nb of positions added */ * @return : nb of positions added */
static U32 ZSTD_insertBt1( static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 ZSTD_insertBt1(
const ZSTD_matchState_t* ms, const ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend, const BYTE* const ip, const BYTE* const iend,
U32 const target, U32 const target,
@ -550,6 +558,7 @@ static U32 ZSTD_insertBt1(
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_updateTree_internal( void ZSTD_updateTree_internal(
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend, const BYTE* const ip, const BYTE* const iend,
@ -558,7 +567,7 @@ void ZSTD_updateTree_internal(
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
U32 const target = (U32)(ip - base); U32 const target = (U32)(ip - base);
U32 idx = ms->nextToUpdate; U32 idx = ms->nextToUpdate;
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode); idx, target, dictMode);
while(idx < target) { while(idx < target) {
@ -575,7 +584,9 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
} }
FORCE_INLINE_TEMPLATE U32 FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32
ZSTD_insertBtAndGetAllMatches ( ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
@ -816,7 +827,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
U32 const ll0, U32 const ll0,
U32 const lengthToBeat); U32 const lengthToBeat);
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
U32 ZSTD_btGetAllMatches_internal(
ZSTD_match_t* matches, ZSTD_match_t* matches,
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
U32* nextToUpdate3, U32* nextToUpdate3,
@ -1035,11 +1048,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
* Optimal parser * Optimal parser
*********************************/ *********************************/
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
{
return sol.litlen + sol.mlen;
}
#if 0 /* debug */ #if 0 /* debug */
static void static void
@ -1057,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
#endif #endif
FORCE_INLINE_TEMPLATE size_t #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
seqStore_t* seqStore, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM], U32 rep[ZSTD_REP_NUM],
@ -1083,10 +1097,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_match_t* const matches = optStatePtr->matchTable;
ZSTD_optimal_t lastSequence; ZSTD_optimal_t lastStretch;
ZSTD_optLdm_t optLdm; ZSTD_optLdm_t optLdm;
ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t)); ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
@ -1108,19 +1122,31 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const ll0 = !litlen; U32 const ll0 = !litlen;
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
(U32)(ip-istart), (U32)(iend - ip)); (U32)(ip-istart), (U32)(iend-ip));
if (!nbMatches) { ip++; continue; } if (!nbMatches) {
DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
ip++;
continue;
}
/* Match found: let's store this solution, and eventually find more candidates.
* During this forward pass, @opt is used to store stretches,
* defined as "a match followed by N literals".
* Note how this is different from a Sequence, which is "N literals followed by a match".
* Storing stretches allows us to store different match predecessors
* for each literal position part of a literals run. */
/* initialize opt[0] */ /* initialize opt[0] */
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } opt[0].mlen = 0; /* there are only literals so far */
opt[0].mlen = 0; /* means is_a_literal */
opt[0].litlen = litlen; opt[0].litlen = litlen;
/* We don't need to include the actual price of the literals because /* No need to include the actual price of the literals before the first match
* it is static for the duration of the forward pass, and is included * because it is static for the duration of the forward pass, and is included
* in every price. We include the literal length to avoid negative * in every subsequent price. But, we include the literal length because
* prices when we subtract the previous literal length. * the cost variation of litlen depends on the value of litlen.
*/ */
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); opt[0].price = LL_PRICE(litlen);
ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
/* large match -> immediate encoding */ /* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len; { U32 const maxML = matches[nbMatches-1].len;
@ -1129,82 +1155,106 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
if (maxML > sufficient_len) { if (maxML > sufficient_len) {
lastSequence.litlen = litlen; lastStretch.litlen = 0;
lastSequence.mlen = maxML; lastStretch.mlen = maxML;
lastSequence.off = maxOffBase; lastStretch.off = maxOffBase;
DEBUGLOG(6, "large match (%u>%u), immediate encoding", DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
maxML, sufficient_len); maxML, sufficient_len);
cur = 0; cur = 0;
last_pos = ZSTD_totalLen(lastSequence); last_pos = maxML;
goto _shortestPath; goto _shortestPath;
} } } }
/* set prices for first matches starting position == 0 */ /* set prices for first matches starting position == 0 */
assert(opt[0].price >= 0); assert(opt[0].price >= 0);
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); { U32 pos;
U32 pos;
U32 matchNb; U32 matchNb;
for (pos = 1; pos < minMatch; pos++) { for (pos = 1; pos < minMatch; pos++) {
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ opt[pos].price = ZSTD_MAX_PRICE;
opt[pos].mlen = 0;
opt[pos].litlen = litlen + pos;
} }
for (matchNb = 0; matchNb < nbMatches; matchNb++) { for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offBase = matches[matchNb].off; U32 const offBase = matches[matchNb].off;
U32 const end = matches[matchNb].len; U32 const end = matches[matchNb].len;
for ( ; pos <= end ; pos++ ) { for ( ; pos <= end ; pos++ ) {
U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice; int const sequencePrice = opt[0].price + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f", DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
pos, ZSTD_fCost((int)sequencePrice)); pos, ZSTD_fCost(sequencePrice));
opt[pos].mlen = pos; opt[pos].mlen = pos;
opt[pos].off = offBase; opt[pos].off = offBase;
opt[pos].litlen = litlen; opt[pos].litlen = 0; /* end of match */
opt[pos].price = (int)sequencePrice; opt[pos].price = sequencePrice + LL_PRICE(0);
} } }
}
last_pos = pos-1; last_pos = pos-1;
opt[pos].price = ZSTD_MAX_PRICE;
} }
} }
/* check further positions */ /* check further positions */
for (cur = 1; cur <= last_pos; cur++) { for (cur = 1; cur <= last_pos; cur++) {
const BYTE* const inr = ip + cur; const BYTE* const inr = ip + cur;
assert(cur < ZSTD_OPT_NUM); assert(cur <= ZSTD_OPT_NUM);
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
/* Fix current position with one literal if cheaper */ /* Fix current position with one literal if cheaper */
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; { U32 const litlen = opt[cur-1].litlen + 1;
int const price = opt[cur-1].price int const price = opt[cur-1].price
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + LIT_PRICE(ip+cur-1)
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + LL_INCPRICE(litlen);
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
assert(price < 1000000000); /* overflow check */ assert(price < 1000000000); /* overflow check */
if (price <= opt[cur].price) { if (price <= opt[cur].price) {
ZSTD_optimal_t const prevMatch = opt[cur];
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
opt[cur].mlen = 0; opt[cur] = opt[cur-1];
opt[cur].off = 0;
opt[cur].litlen = litlen; opt[cur].litlen = litlen;
opt[cur].price = price; opt[cur].price = price;
if ( (optLevel >= 1) /* additional check only for higher modes */
&& (prevMatch.litlen == 0) /* replace a match */
&& (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
&& LIKELY(ip + cur < iend)
) {
/* check next position, in case it would be cheaper */
int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
if ( (with1literal < withMoreLiterals)
&& (with1literal < opt[cur+1].price) ) {
/* update offset history - before it disappears */
U32 const prev = cur - prevMatch.mlen;
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
assert(cur >= prevMatch.mlen);
DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
newReps.rep[0], newReps.rep[1], newReps.rep[2] );
opt[cur+1] = prevMatch; /* mlen & offbase */
ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
opt[cur+1].litlen = 1;
opt[cur+1].price = with1literal;
if (last_pos < cur+1) last_pos = cur+1;
}
}
} else { } else {
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
} }
} }
/* Set the repcodes of the current position. We must do it here /* Offset history is not updated during match comparison.
* because we rely on the repcodes of the 2nd to last sequence being * Do it here, now that the match is selected and confirmed.
* correct to set the next chunks repcodes during the backward
* traversal.
*/ */
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
assert(cur >= opt[cur].mlen); assert(cur >= opt[cur].mlen);
if (opt[cur].mlen != 0) { if (opt[cur].litlen == 0) {
/* just finished a match => alter offset history */
U32 const prev = cur - opt[cur].mlen; U32 const prev = cur - opt[cur].mlen;
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
} else {
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
} }
/* last match must start at a minimum distance of 8 from oend */ /* last match must start at a minimum distance of 8 from oend */
@ -1214,15 +1264,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
if ( (optLevel==0) /*static_test*/ if ( (optLevel==0) /*static_test*/
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
} }
assert(opt[cur].price >= 0); assert(opt[cur].price >= 0);
{ U32 const ll0 = (opt[cur].mlen != 0); { U32 const ll0 = (opt[cur].litlen == 0);
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; int const previousPrice = opt[cur].price;
U32 const previousPrice = (U32)opt[cur].price; int const basePrice = previousPrice + LL_PRICE(0);
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
U32 matchNb; U32 matchNb;
@ -1234,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
continue; continue;
} }
{ U32 const maxML = matches[nbMatches-1].len; { U32 const longestML = matches[nbMatches-1].len;
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
inr-istart, cur, nbMatches, maxML); inr-istart, cur, nbMatches, longestML);
if ( (maxML > sufficient_len) if ( (longestML > sufficient_len)
|| (cur + maxML >= ZSTD_OPT_NUM) ) { || (cur + longestML >= ZSTD_OPT_NUM)
lastSequence.mlen = maxML; || (ip + cur + longestML >= iend) ) {
lastSequence.off = matches[nbMatches-1].off; lastStretch.mlen = longestML;
lastSequence.litlen = litlen; lastStretch.off = matches[nbMatches-1].off;
cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ lastStretch.litlen = 0;
last_pos = cur + ZSTD_totalLen(lastSequence); last_pos = cur + longestML;
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
goto _shortestPath; goto _shortestPath;
} } } }
@ -1257,19 +1305,24 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 mlen; U32 mlen;
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
matchNb, matches[matchNb].off, lastML, litlen); matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
U32 const pos = cur + mlen; U32 const pos = cur + mlen;
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
if ((pos > last_pos) || (price < opt[pos].price)) { if ((pos > last_pos) || (price < opt[pos].price)) {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ while (last_pos < pos) {
/* fill empty positions, for future comparisons */
last_pos++;
opt[last_pos].price = ZSTD_MAX_PRICE;
opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
}
opt[pos].mlen = mlen; opt[pos].mlen = mlen;
opt[pos].off = offset; opt[pos].off = offset;
opt[pos].litlen = litlen; opt[pos].litlen = 0;
opt[pos].price = price; opt[pos].price = price;
} else { } else {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@ -1277,47 +1330,81 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
} }
} } } } } }
opt[last_pos+1].price = ZSTD_MAX_PRICE;
} /* for (cur = 1; cur <= last_pos; cur++) */ } /* for (cur = 1; cur <= last_pos; cur++) */
lastSequence = opt[last_pos]; lastStretch = opt[last_pos];
cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ assert(cur >= lastStretch.mlen);
assert(cur < ZSTD_OPT_NUM); /* control overflow*/ cur = last_pos - lastStretch.mlen;
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(opt[0].mlen == 0); assert(opt[0].mlen == 0);
assert(last_pos >= lastStretch.mlen);
assert(cur == last_pos - lastStretch.mlen);
/* Set the next chunk's repcodes based on the repcodes of the beginning if (lastStretch.mlen==0) {
* of the last match, and the last sequence. This avoids us having to /* no solution : all matches have been converted into literals */
* update them while traversing the sequences. assert(lastStretch.litlen == (ip - anchor) + last_pos);
*/ ip += last_pos;
if (lastSequence.mlen != 0) { continue;
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); }
ZSTD_memcpy(rep, &reps, sizeof(reps)); assert(lastStretch.off > 0);
/* Update offset history */
if (lastStretch.litlen == 0) {
/* finishing on a match : update offset history */
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
} else { } else {
ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
assert(cur >= lastStretch.litlen);
cur -= lastStretch.litlen;
} }
{ U32 const storeEnd = cur + 1; /* Let's write the shortest path solution.
* It is stored in @opt in reverse order,
* starting from @storeEnd (==cur+2),
* effectively partially @opt overwriting.
* Content is changed too:
* - So far, @opt stored stretches, aka a match followed by literals
* - Now, it will store sequences, aka literals followed by a match
*/
{ U32 const storeEnd = cur + 2;
U32 storeStart = storeEnd; U32 storeStart = storeEnd;
U32 seqPos = cur; U32 stretchPos = cur;
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
last_pos, cur); (void)last_pos; last_pos, cur); (void)last_pos;
assert(storeEnd < ZSTD_OPT_NUM); assert(storeEnd < ZSTD_OPT_SIZE);
DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
opt[storeEnd] = lastSequence; if (lastStretch.litlen > 0) {
while (seqPos > 0) { /* last "sequence" is unfinished: just a bunch of literals */
U32 const backDist = ZSTD_totalLen(opt[seqPos]); opt[storeEnd].litlen = lastStretch.litlen;
opt[storeEnd].mlen = 0;
storeStart = storeEnd-1;
opt[storeStart] = lastStretch;
} {
opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
storeStart = storeEnd;
}
while (1) {
ZSTD_optimal_t nextStretch = opt[stretchPos];
opt[storeStart].litlen = nextStretch.litlen;
DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
if (nextStretch.mlen == 0) {
/* reaching beginning of segment */
break;
}
storeStart--; storeStart--;
DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", opt[storeStart] = nextStretch; /* note: litlen will be fixed */
seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
opt[storeStart] = opt[seqPos]; stretchPos -= nextStretch.litlen + nextStretch.mlen;
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
} }
/* save sequences */ /* save sequences */
DEBUGLOG(6, "sending selected sequences into seqStore") DEBUGLOG(6, "sending selected sequences into seqStore");
{ U32 storePos; { U32 storePos;
for (storePos=storeStart; storePos <= storeEnd; storePos++) { for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen; U32 const llen = opt[storePos].litlen;
@ -1339,6 +1426,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
anchor += advance; anchor += advance;
ip = anchor; ip = anchor;
} } } }
DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
/* update all costs */
ZSTD_setBasePrices(optStatePtr, optLevel); ZSTD_setBasePrices(optStatePtr, optLevel);
} }
} /* while (ip < ilimit) */ } /* while (ip < ilimit) */
@ -1346,21 +1436,27 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
} }
#endif /* build exclusions */
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
static size_t ZSTD_compressBlock_opt0( static size_t ZSTD_compressBlock_opt0(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
{ {
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
} }
#endif
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
static size_t ZSTD_compressBlock_opt2( static size_t ZSTD_compressBlock_opt2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
{ {
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
} }
#endif
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt( size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize) const void* src, size_t srcSize)
@ -1368,20 +1464,23 @@ size_t ZSTD_compressBlock_btopt(
DEBUGLOG(5, "ZSTD_compressBlock_btopt"); DEBUGLOG(5, "ZSTD_compressBlock_btopt");
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
} }
#endif
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
/* ZSTD_initStats_ultra(): /* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values. * make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm. * only works on first block, with no dictionary and no ldm.
* this function cannot error out, its narrow contract must be respected. * this function cannot error out, its narrow contract must be respected.
*/ */
static void static
ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
seqStore_t* seqStore, void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
U32 rep[ZSTD_REP_NUM], seqStore_t* seqStore,
const void* src, size_t srcSize) U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{ {
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@ -1425,7 +1524,7 @@ size_t ZSTD_compressBlock_btultra2(
* Consequently, this can only work if no data has been previously loaded in tables, * Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing. * aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block), * The compression ratio gain is generally small (~0.5% on first block),
** the cost is 2x cpu time on first block. */ * the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX); assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */ if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
@ -1438,7 +1537,9 @@ size_t ZSTD_compressBlock_btultra2(
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
} }
#endif
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt_dictMatchState( size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize) const void* src, size_t srcSize)
@ -1446,19 +1547,21 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
} }
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_btopt_extDict( size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
} }
#endif
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_btultra_extDict( size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -1466,6 +1569,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
{ {
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
} }
#endif
/* note : no btultra2 variant for extDict nor dictMatchState, /* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries * because btultra2 is not meant to work with dictionaries

View File

@ -17,28 +17,38 @@ extern "C" {
#include "zstd_compress_internal.h" #include "zstd_compress_internal.h"
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
/* used in ZSTD_loadDictionaryContent() */ /* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
#endif
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt( size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_dictMatchState( size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState( size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_extDict( #define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
#else
#define ZSTD_COMPRESSBLOCK_BTOPT NULL
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
#endif
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_extDict( size_t ZSTD_compressBlock_btultra_extDict(
@ -48,6 +58,20 @@ size_t ZSTD_compressBlock_btultra_extDict(
/* note : no btultra2 variant for extDict nor dictMatchState, /* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries * because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */ * and is only specific for the first block (no prefix) */
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
#else
#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
#endif
#if defined (__cplusplus) #if defined (__cplusplus)
} }

View File

@ -15,17 +15,13 @@
#endif #endif
/* ====== Constants ====== */
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
/* ====== Dependencies ====== */ /* ====== Dependencies ====== */
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */ #include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */ #include "../common/pool.h" /* threadpool */
#include "../common/threading.h" /* mutex */ #include "../common/threading.h" /* mutex */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstd_ldm.h" #include "zstd_ldm.h"
#include "zstdmt_compress.h" #include "zstdmt_compress.h"
@ -44,12 +40,13 @@
# include <unistd.h> # include <unistd.h>
# include <sys/times.h> # include <sys/times.h>
# define DEBUG_PRINTHEX(l,p,n) { \ # define DEBUG_PRINTHEX(l,p,n) \
unsigned debug_u; \ do { \
for (debug_u=0; debug_u<(n); debug_u++) \ unsigned debug_u; \
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ for (debug_u=0; debug_u<(n); debug_u++) \
RAWLOG(l, " \n"); \ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
} RAWLOG(l, " \n"); \
} while (0)
static unsigned long long GetCurrentClockTimeMicroseconds(void) static unsigned long long GetCurrentClockTimeMicroseconds(void)
{ {
@ -61,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
} } } }
#define MUTEX_WAIT_TIME_DLEVEL 6 #define MUTEX_WAIT_TIME_DLEVEL 6
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ do { \
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
ZSTD_pthread_mutex_lock(mutex); \ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ ZSTD_pthread_mutex_lock(mutex); \
unsigned long long const elapsedTime = (afterTime-beforeTime); \ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ unsigned long long const elapsedTime = (afterTime-beforeTime); \
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ if (elapsedTime > 1000) { \
elapsedTime, #mutex); \ /* or whatever threshold you like; I'm using 1 millisecond here */ \
} } \ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
} else { \ "Thread took %llu microseconds to acquire mutex %s \n", \
ZSTD_pthread_mutex_lock(mutex); \ elapsedTime, #mutex); \
} \ } } \
} } else { \
ZSTD_pthread_mutex_lock(mutex); \
} \
} while (0)
#else #else
# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) # define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
# define DEBUG_PRINTHEX(l,p,n) {} # define DEBUG_PRINTHEX(l,p,n) do { } while (0)
#endif #endif
@ -100,18 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
unsigned totalBuffers; unsigned totalBuffers;
unsigned nbBuffers; unsigned nbBuffers;
ZSTD_customMem cMem; ZSTD_customMem cMem;
buffer_t bTable[1]; /* variable size */ buffer_t* buffers;
} ZSTDMT_bufferPool; } ZSTDMT_bufferPool;
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
{
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
if (!bufPool) return; /* compatibility with free on NULL */
if (bufPool->buffers) {
unsigned u;
for (u=0; u<bufPool->totalBuffers; u++) {
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
}
ZSTD_customFree(bufPool->buffers, bufPool->cMem);
}
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
ZSTD_customFree(bufPool, bufPool->cMem);
}
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem) static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
{ {
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc( ZSTDMT_bufferPool* const bufPool =
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
if (bufPool==NULL) return NULL; if (bufPool==NULL) return NULL;
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
ZSTD_customFree(bufPool, cMem); ZSTD_customFree(bufPool, cMem);
return NULL; return NULL;
} }
bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
if (bufPool->buffers==NULL) {
ZSTDMT_freeBufferPool(bufPool);
return NULL;
}
bufPool->bufferSize = 64 KB; bufPool->bufferSize = 64 KB;
bufPool->totalBuffers = maxNbBuffers; bufPool->totalBuffers = maxNbBuffers;
bufPool->nbBuffers = 0; bufPool->nbBuffers = 0;
@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
return bufPool; return bufPool;
} }
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
{
unsigned u;
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
if (!bufPool) return; /* compatibility with free on NULL */
for (u=0; u<bufPool->totalBuffers; u++) {
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
}
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
ZSTD_customFree(bufPool, bufPool->cMem);
}
/* only works at initialization, not during compression */ /* only works at initialization, not during compression */
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
{ {
size_t const poolSize = sizeof(*bufPool) size_t const poolSize = sizeof(*bufPool);
+ (bufPool->totalBuffers - 1) * sizeof(buffer_t); size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
unsigned u; unsigned u;
size_t totalBufferSize = 0; size_t totalBufferSize = 0;
ZSTD_pthread_mutex_lock(&bufPool->poolMutex); ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
for (u=0; u<bufPool->totalBuffers; u++) for (u=0; u<bufPool->totalBuffers; u++)
totalBufferSize += bufPool->bTable[u].capacity; totalBufferSize += bufPool->buffers[u].capacity;
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return poolSize + totalBufferSize; return poolSize + arraySize + totalBufferSize;
} }
/* ZSTDMT_setBufferSize() : /* ZSTDMT_setBufferSize() :
@ -187,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
ZSTD_pthread_mutex_lock(&bufPool->poolMutex); ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers) { /* try to use an existing buffer */ if (bufPool->nbBuffers) { /* try to use an existing buffer */
buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
size_t const availBufferSize = buf.capacity; size_t const availBufferSize = buf.capacity;
bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
/* large enough, but not too much */ /* large enough, but not too much */
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@ -250,14 +258,14 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
if (buf.start == NULL) return; /* compatible with release on NULL */ if (buf.start == NULL) return; /* compatible with release on NULL */
ZSTD_pthread_mutex_lock(&bufPool->poolMutex); ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers < bufPool->totalBuffers) { if (bufPool->nbBuffers < bufPool->totalBuffers) {
bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
(U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return; return;
} }
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
/* Reached bufferPool capacity (should not happen) */ /* Reached bufferPool capacity (note: should not happen) */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
ZSTD_customFree(buf.start, bufPool->cMem); ZSTD_customFree(buf.start, bufPool->cMem);
} }
@ -350,16 +358,20 @@ typedef struct {
int totalCCtx; int totalCCtx;
int availCCtx; int availCCtx;
ZSTD_customMem cMem; ZSTD_customMem cMem;
ZSTD_CCtx* cctx[1]; /* variable size */ ZSTD_CCtx** cctxs;
} ZSTDMT_CCtxPool; } ZSTDMT_CCtxPool;
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ /* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
{ {
int cid; if (!pool) return;
for (cid=0; cid<pool->totalCCtx; cid++)
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
ZSTD_pthread_mutex_destroy(&pool->poolMutex); ZSTD_pthread_mutex_destroy(&pool->poolMutex);
if (pool->cctxs) {
int cid;
for (cid=0; cid<pool->totalCCtx; cid++)
ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
ZSTD_customFree(pool->cctxs, pool->cMem);
}
ZSTD_customFree(pool, pool->cMem); ZSTD_customFree(pool, pool->cMem);
} }
@ -368,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
ZSTD_customMem cMem) ZSTD_customMem cMem)
{ {
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc( ZSTDMT_CCtxPool* const cctxPool =
sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
assert(nbWorkers > 0); assert(nbWorkers > 0);
if (!cctxPool) return NULL; if (!cctxPool) return NULL;
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
ZSTD_customFree(cctxPool, cMem); ZSTD_customFree(cctxPool, cMem);
return NULL; return NULL;
} }
cctxPool->cMem = cMem;
cctxPool->totalCCtx = nbWorkers; cctxPool->totalCCtx = nbWorkers;
cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
if (!cctxPool->cctxs) {
ZSTDMT_freeCCtxPool(cctxPool);
return NULL;
}
cctxPool->cMem = cMem;
cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
return cctxPool; return cctxPool;
} }
@ -402,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
{ {
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
{ unsigned const nbWorkers = cctxPool->totalCCtx; { unsigned const nbWorkers = cctxPool->totalCCtx;
size_t const poolSize = sizeof(*cctxPool) size_t const poolSize = sizeof(*cctxPool);
+ (nbWorkers-1) * sizeof(ZSTD_CCtx*); size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
unsigned u;
size_t totalCCtxSize = 0; size_t totalCCtxSize = 0;
unsigned u;
for (u=0; u<nbWorkers; u++) { for (u=0; u<nbWorkers; u++) {
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]); totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
} }
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
assert(nbWorkers > 0); assert(nbWorkers > 0);
return poolSize + totalCCtxSize; return poolSize + arraySize + totalCCtxSize;
} }
} }
@ -421,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
if (cctxPool->availCCtx) { if (cctxPool->availCCtx) {
cctxPool->availCCtx--; cctxPool->availCCtx--;
{ ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; { ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
return cctx; return cctx;
} } } }
@ -435,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
if (cctx==NULL) return; /* compatibility with release on NULL */ if (cctx==NULL) return; /* compatibility with release on NULL */
ZSTD_pthread_mutex_lock(&pool->poolMutex); ZSTD_pthread_mutex_lock(&pool->poolMutex);
if (pool->availCCtx < pool->totalCCtx) if (pool->availCCtx < pool->totalCCtx)
pool->cctx[pool->availCCtx++] = cctx; pool->cctxs[pool->availCCtx++] = cctx;
else { else {
/* pool overflow : should not happen, since totalCCtx==nbWorkers */ /* pool overflow : should not happen, since totalCCtx==nbWorkers */
DEBUGLOG(4, "CCtx pool overflow : free cctx"); DEBUGLOG(4, "CCtx pool overflow : free cctx");
@ -601,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
ZSTD_pthread_mutex_unlock(&serialState->mutex); ZSTD_pthread_mutex_unlock(&serialState->mutex);
if (seqStore.size > 0) { if (seqStore.size > 0) {
size_t const err = ZSTD_referenceExternalSequences( ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
jobCCtx, seqStore.seq, seqStore.size);
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable); assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
assert(!ZSTD_isError(err));
(void)err;
} }
} }
@ -657,12 +671,13 @@ typedef struct {
unsigned frameChecksumNeeded; /* used only by mtctx */ unsigned frameChecksumNeeded; /* used only by mtctx */
} ZSTDMT_jobDescription; } ZSTDMT_jobDescription;
#define JOB_ERROR(e) { \ #define JOB_ERROR(e) \
ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \ do { \
job->cSize = e; \ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
ZSTD_pthread_mutex_unlock(&job->job_mutex); \ job->cSize = e; \
goto _endJob; \ ZSTD_pthread_mutex_unlock(&job->job_mutex); \
} goto _endJob; \
} while (0)
/* ZSTDMT_compressionJob() is a POOL_function type */ /* ZSTDMT_compressionJob() is a POOL_function type */
static void ZSTDMT_compressionJob(void* jobDescription) static void ZSTDMT_compressionJob(void* jobDescription)
@ -1091,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
{ unsigned jobNb; { unsigned jobNb;
unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
mtctx->doneJobID, lastJobNb, mtctx->jobReady) mtctx->doneJobID, lastJobNb, mtctx->jobReady);
for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
unsigned const wJobID = jobNb & mtctx->jobIDMask; unsigned const wJobID = jobNb & mtctx->jobIDMask;
ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID]; ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];

View File

@ -34,6 +34,12 @@
* Macros * Macros
****************************************************************/ ****************************************************************/
#ifdef HUF_DISABLE_FAST_DECODE
# define HUF_ENABLE_FAST_DECODE 0
#else
# define HUF_ENABLE_FAST_DECODE 1
#endif
/* These two optional macros force the use one way or another of the two /* These two optional macros force the use one way or another of the two
* Huffman decompression implementations. You can't force in both directions * Huffman decompression implementations. You can't force in both directions
* at the same time. * at the same time.
@ -158,17 +164,18 @@ static size_t HUF_initFastDStream(BYTE const* ip) {
* op [in/out] - The output pointers, must be updated to reflect what is written. * op [in/out] - The output pointers, must be updated to reflect what is written.
* bits [in/out] - The bitstream containers, must be updated to reflect the current state. * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
* dt [in] - The decoding table. * dt [in] - The decoding table.
* ilimit [in] - The input limit, stop when any input pointer is below ilimit. * ilowest [in] - The beginning of the valid range of the input. Decoders may read
* down to this pointer. It may be below iend[0].
* oend [in] - The end of the output stream. op[3] must not cross oend. * oend [in] - The end of the output stream. op[3] must not cross oend.
* iend [in] - The end of each input stream. ip[i] may cross iend[i], * iend [in] - The end of each input stream. ip[i] may cross iend[i],
* as long as it is above ilimit, but that indicates corruption. * as long as it is above ilowest, but that indicates corruption.
*/ */
typedef struct { typedef struct {
BYTE const* ip[4]; BYTE const* ip[4];
BYTE* op[4]; BYTE* op[4];
U64 bits[4]; U64 bits[4];
void const* dt; void const* dt;
BYTE const* ilimit; BYTE const* ilowest;
BYTE* oend; BYTE* oend;
BYTE const* iend[4]; BYTE const* iend[4];
} HUF_DecompressFastArgs; } HUF_DecompressFastArgs;
@ -186,9 +193,9 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
void const* dt = DTable + 1; void const* dt = DTable + 1;
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
const BYTE* const ilimit = (const BYTE*)src + 6 + 8; const BYTE* const istart = (const BYTE*)src;
BYTE* const oend = (BYTE*)dst + dstSize; BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
/* The fast decoding loop assumes 64-bit little-endian. /* The fast decoding loop assumes 64-bit little-endian.
* This condition is false on x32. * This condition is false on x32.
@ -196,6 +203,11 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
if (!MEM_isLittleEndian() || MEM_32bits()) if (!MEM_isLittleEndian() || MEM_32bits())
return 0; return 0;
/* Avoid nullptr addition */
if (dstSize == 0)
return 0;
assert(dst != NULL);
/* strict minimum : jump table + 1 byte per stream */ /* strict minimum : jump table + 1 byte per stream */
if (srcSize < 10) if (srcSize < 10)
return ERROR(corruption_detected); return ERROR(corruption_detected);
@ -209,7 +221,6 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
/* Read the jump table. */ /* Read the jump table. */
{ {
const BYTE* const istart = (const BYTE*)src;
size_t const length1 = MEM_readLE16(istart); size_t const length1 = MEM_readLE16(istart);
size_t const length2 = MEM_readLE16(istart+2); size_t const length2 = MEM_readLE16(istart+2);
size_t const length3 = MEM_readLE16(istart+4); size_t const length3 = MEM_readLE16(istart+4);
@ -221,10 +232,8 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
/* HUF_initFastDStream() requires this, and this small of an input /* HUF_initFastDStream() requires this, and this small of an input
* won't benefit from the ASM loop anyways. * won't benefit from the ASM loop anyways.
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
* starts.
*/ */
if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
return 0; return 0;
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
} }
@ -256,11 +265,12 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
args->bits[2] = HUF_initFastDStream(args->ip[2]); args->bits[2] = HUF_initFastDStream(args->ip[2]);
args->bits[3] = HUF_initFastDStream(args->ip[3]); args->bits[3] = HUF_initFastDStream(args->ip[3]);
/* If ip[] >= ilimit, it is guaranteed to be safe to /* The decoders must be sure to never read beyond ilowest.
* reload bits[]. It may be beyond its section, but is * This is lower than iend[0], but allowing decoders to read
* guaranteed to be valid (>= istart). * down to ilowest can allow an extra iteration or two in the
*/ * fast loop.
args->ilimit = ilimit; */
args->ilowest = istart;
args->oend = oend; args->oend = oend;
args->dt = dt; args->dt = dt;
@ -285,13 +295,31 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg
assert(sizeof(size_t) == 8); assert(sizeof(size_t) == 8);
bit->bitContainer = MEM_readLEST(args->ip[stream]); bit->bitContainer = MEM_readLEST(args->ip[stream]);
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
bit->start = (const char*)args->iend[0]; bit->start = (const char*)args->ilowest;
bit->limitPtr = bit->start + sizeof(size_t); bit->limitPtr = bit->start + sizeof(size_t);
bit->ptr = (const char*)args->ip[stream]; bit->ptr = (const char*)args->ip[stream];
return 0; return 0;
} }
/* Calls X(N) for each stream 0, 1, 2, 3. */
#define HUF_4X_FOR_EACH_STREAM(X) \
do { \
X(0); \
X(1); \
X(2); \
X(3); \
} while (0)
/* Calls X(N, var) for each stream 0, 1, 2, 3. */
#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
do { \
X(0, (var)); \
X(1, (var)); \
X(2, (var)); \
X(3, (var)); \
} while (0)
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
@ -500,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
} }
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ do { \
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
} while (0)
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
if (MEM_64bits()) \ do { \
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) if (MEM_64bits()) \
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
} while (0)
HINT_INLINE size_t HINT_INLINE size_t
HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
@ -546,7 +578,7 @@ HUF_decompress1X1_usingDTable_internal_body(
const HUF_DTable* DTable) const HUF_DTable* DTable)
{ {
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
BYTE* const oend = op + dstSize; BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
const void* dtPtr = DTable + 1; const void* dtPtr = DTable + 1;
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
BIT_DStream_t bitD; BIT_DStream_t bitD;
@ -574,6 +606,7 @@ HUF_decompress4X1_usingDTable_internal_body(
{ {
/* Check */ /* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
{ const BYTE* const istart = (const BYTE*) cSrc; { const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst; BYTE* const ostart = (BYTE*) dst;
@ -609,7 +642,7 @@ HUF_decompress4X1_usingDTable_internal_body(
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ assert(dstSize >= 6); /* validated above */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@ -692,7 +725,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
BYTE* op[4]; BYTE* op[4];
U16 const* const dtable = (U16 const*)args->dt; U16 const* const dtable = (U16 const*)args->dt;
BYTE* const oend = args->oend; BYTE* const oend = args->oend;
BYTE const* const ilimit = args->ilimit; BYTE const* const ilowest = args->ilowest;
/* Copy the arguments to local variables */ /* Copy the arguments to local variables */
ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
@ -705,13 +738,12 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
for (;;) { for (;;) {
BYTE* olimit; BYTE* olimit;
int stream; int stream;
int symbol;
/* Assert loop preconditions */ /* Assert loop preconditions */
#ifndef NDEBUG #ifndef NDEBUG
for (stream = 0; stream < 4; ++stream) { for (stream = 0; stream < 4; ++stream) {
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1])); assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
assert(ip[stream] >= ilimit); assert(ip[stream] >= ilowest);
} }
#endif #endif
/* Compute olimit */ /* Compute olimit */
@ -721,7 +753,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
* per stream. * per stream.
*/ */
size_t const iiters = (size_t)(ip[0] - ilimit) / 7; size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
/* We can safely run iters iterations before running bounds checks */ /* We can safely run iters iterations before running bounds checks */
size_t const iters = MIN(oiters, iiters); size_t const iters = MIN(oiters, iiters);
size_t const symbols = iters * 5; size_t const symbols = iters * 5;
@ -732,8 +764,8 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
*/ */
olimit = op[3] + symbols; olimit = op[3] + symbols;
/* Exit fast decoding loop once we get close to the end. */ /* Exit fast decoding loop once we reach the end. */
if (op[3] + 20 > olimit) if (op[3] == olimit)
break; break;
/* Exit the decoding loop if any input pointer has crossed the /* Exit the decoding loop if any input pointer has crossed the
@ -752,27 +784,42 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
} }
#endif #endif
#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \
do { \
int const index = (int)(bits[(_stream)] >> 53); \
int const entry = (int)dtable[index]; \
bits[(_stream)] <<= (entry & 0x3F); \
op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
} while (0)
#define HUF_4X1_RELOAD_STREAM(_stream) \
do { \
int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
int const nbBits = ctz & 7; \
int const nbBytes = ctz >> 3; \
op[(_stream)] += 5; \
ip[(_stream)] -= nbBytes; \
bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
bits[(_stream)] <<= nbBits; \
} while (0)
/* Manually unroll the loop because compilers don't consistently
* unroll the inner loops, which destroys performance.
*/
do { do {
/* Decode 5 symbols in each of the 4 streams */ /* Decode 5 symbols in each of the 4 streams */
for (symbol = 0; symbol < 5; ++symbol) { HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
for (stream = 0; stream < 4; ++stream) { HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
int const index = (int)(bits[stream] >> 53); HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
int const entry = (int)dtable[index]; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
bits[stream] <<= (entry & 63); HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
} /* Reload each of the 4 the bitstreams */
} HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
/* Reload the bitstreams */
for (stream = 0; stream < 4; ++stream) {
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
int const nbBits = ctz & 7;
int const nbBytes = ctz >> 3;
op[stream] += 5;
ip[stream] -= nbBytes;
bits[stream] = MEM_read64(ip[stream]) | 1;
bits[stream] <<= nbBits;
}
} while (op[3] < olimit); } while (op[3] < olimit);
#undef HUF_4X1_DECODE_SYMBOL
#undef HUF_4X1_RELOAD_STREAM
} }
_out: _out:
@ -797,8 +844,8 @@ HUF_decompress4X1_usingDTable_internal_fast(
HUF_DecompressFastLoopFn loopFn) HUF_DecompressFastLoopFn loopFn)
{ {
void const* dt = DTable + 1; void const* dt = DTable + 1;
const BYTE* const iend = (const BYTE*)cSrc + 6; BYTE const* const ilowest = (BYTE const*)cSrc;
BYTE* const oend = (BYTE*)dst + dstSize; BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
HUF_DecompressFastArgs args; HUF_DecompressFastArgs args;
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
FORWARD_IF_ERROR(ret, "Failed to init fast loop args"); FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
@ -806,18 +853,22 @@ HUF_decompress4X1_usingDTable_internal_fast(
return 0; return 0;
} }
assert(args.ip[0] >= args.ilimit); assert(args.ip[0] >= args.ilowest);
loopFn(&args); loopFn(&args);
/* Our loop guarantees that ip[] >= ilimit and that we haven't /* Our loop guarantees that ip[] >= ilowest and that we haven't
* overwritten any op[]. * overwritten any op[].
*/ */
assert(args.ip[0] >= iend); assert(args.ip[0] >= ilowest);
assert(args.ip[1] >= iend); assert(args.ip[0] >= ilowest);
assert(args.ip[2] >= iend); assert(args.ip[1] >= ilowest);
assert(args.ip[3] >= iend); assert(args.ip[2] >= ilowest);
assert(args.ip[3] >= ilowest);
assert(args.op[3] <= oend); assert(args.op[3] <= oend);
(void)iend;
assert(ilowest == args.ilowest);
assert(ilowest + 6 == args.iend[0]);
(void)ilowest;
/* finish bit streams one by one. */ /* finish bit streams one by one. */
{ size_t const segmentSize = (dstSize+3) / 4; { size_t const segmentSize = (dstSize+3) / 4;
@ -868,7 +919,7 @@ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize,
} }
#endif #endif
if (!(flags & HUF_flags_disableFast)) { if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
if (ret != 0) if (ret != 0)
return ret; return ret;
@ -1239,15 +1290,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c
} }
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ do { \
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
} while (0)
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
if (MEM_64bits()) \ do { \
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) if (MEM_64bits()) \
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
} while (0)
HINT_INLINE size_t HINT_INLINE size_t
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
@ -1307,7 +1362,7 @@ HUF_decompress1X2_usingDTable_internal_body(
/* decode */ /* decode */
{ BYTE* const ostart = (BYTE*) dst; { BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize; BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
DTableDesc const dtd = HUF_getDTableDesc(DTable); DTableDesc const dtd = HUF_getDTableDesc(DTable);
@ -1332,6 +1387,7 @@ HUF_decompress4X2_usingDTable_internal_body(
const HUF_DTable* DTable) const HUF_DTable* DTable)
{ {
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
{ const BYTE* const istart = (const BYTE*) cSrc; { const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst; BYTE* const ostart = (BYTE*) dst;
@ -1367,7 +1423,7 @@ HUF_decompress4X2_usingDTable_internal_body(
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ assert(dstSize >= 6 /* validated above */);
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@ -1472,7 +1528,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
BYTE* op[4]; BYTE* op[4];
BYTE* oend[4]; BYTE* oend[4];
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
BYTE const* const ilimit = args->ilimit; BYTE const* const ilowest = args->ilowest;
/* Copy the arguments to local registers. */ /* Copy the arguments to local registers. */
ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
@ -1490,13 +1546,12 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
for (;;) { for (;;) {
BYTE* olimit; BYTE* olimit;
int stream; int stream;
int symbol;
/* Assert loop preconditions */ /* Assert loop preconditions */
#ifndef NDEBUG #ifndef NDEBUG
for (stream = 0; stream < 4; ++stream) { for (stream = 0; stream < 4; ++stream) {
assert(op[stream] <= oend[stream]); assert(op[stream] <= oend[stream]);
assert(ip[stream] >= ilimit); assert(ip[stream] >= ilowest);
} }
#endif #endif
/* Compute olimit */ /* Compute olimit */
@ -1509,7 +1564,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
* We also know that each input pointer is >= ip[0]. So we can run * We also know that each input pointer is >= ip[0]. So we can run
* iters loops before running out of input. * iters loops before running out of input.
*/ */
size_t iters = (size_t)(ip[0] - ilimit) / 7; size_t iters = (size_t)(ip[0] - ilowest) / 7;
/* Each iteration can produce up to 10 bytes of output per stream. /* Each iteration can produce up to 10 bytes of output per stream.
* Each output stream my advance at different rates. So take the * Each output stream my advance at different rates. So take the
* minimum number of safe iterations among all the output streams. * minimum number of safe iterations among all the output streams.
@ -1527,8 +1582,8 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
*/ */
olimit = op[3] + (iters * 5); olimit = op[3] + (iters * 5);
/* Exit the fast decoding loop if we are too close to the end. */ /* Exit the fast decoding loop once we reach the end. */
if (op[3] + 10 > olimit) if (op[3] == olimit)
break; break;
/* Exit the decoding loop if any input pointer has crossed the /* Exit the decoding loop if any input pointer has crossed the
@ -1547,54 +1602,58 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
} }
#endif #endif
#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \
do { \
if ((_decode3) || (_stream) != 3) { \
int const index = (int)(bits[(_stream)] >> 53); \
HUF_DEltX2 const entry = dtable[index]; \
MEM_write16(op[(_stream)], entry.sequence); \
bits[(_stream)] <<= (entry.nbBits) & 0x3F; \
op[(_stream)] += (entry.length); \
} \
} while (0)
#define HUF_4X2_RELOAD_STREAM(_stream) \
do { \
HUF_4X2_DECODE_SYMBOL(3, 1); \
{ \
int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
int const nbBits = ctz & 7; \
int const nbBytes = ctz >> 3; \
ip[(_stream)] -= nbBytes; \
bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
bits[(_stream)] <<= nbBits; \
} \
} while (0)
/* Manually unroll the loop because compilers don't consistently
* unroll the inner loops, which destroys performance.
*/
do { do {
/* Do 5 table lookups for each of the first 3 streams */ /* Decode 5 symbols from each of the first 3 streams.
for (symbol = 0; symbol < 5; ++symbol) { * The final stream will be decoded during the reload phase
for (stream = 0; stream < 3; ++stream) { * to reduce register pressure.
int const index = (int)(bits[stream] >> 53); */
HUF_DEltX2 const entry = dtable[index]; HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
MEM_write16(op[stream], entry.sequence); HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
bits[stream] <<= (entry.nbBits); HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
op[stream] += (entry.length); HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
} HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
}
/* Do 1 table lookup from the final stream */ /* Decode one symbol from the final stream */
{ HUF_4X2_DECODE_SYMBOL(3, 1);
int const index = (int)(bits[3] >> 53);
HUF_DEltX2 const entry = dtable[index]; /* Decode 4 symbols from the final stream & reload bitstreams.
MEM_write16(op[3], entry.sequence); * The final stream is reloaded last, meaning that all 5 symbols
bits[3] <<= (entry.nbBits); * are decoded from the final stream before it is reloaded.
op[3] += (entry.length); */
} HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
/* Do 4 table lookups from the final stream & reload bitstreams */
for (stream = 0; stream < 4; ++stream) {
/* Do a table lookup from the final stream.
* This is interleaved with the reloading to reduce register
* pressure. This shouldn't be necessary, but compilers can
* struggle with codegen with high register pressure.
*/
{
int const index = (int)(bits[3] >> 53);
HUF_DEltX2 const entry = dtable[index];
MEM_write16(op[3], entry.sequence);
bits[3] <<= (entry.nbBits);
op[3] += (entry.length);
}
/* Reload the bistreams. The final bitstream must be reloaded
* after the 5th symbol was decoded.
*/
{
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
int const nbBits = ctz & 7;
int const nbBytes = ctz >> 3;
ip[stream] -= nbBytes;
bits[stream] = MEM_read64(ip[stream]) | 1;
bits[stream] <<= nbBits;
}
}
} while (op[3] < olimit); } while (op[3] < olimit);
} }
#undef HUF_4X2_DECODE_SYMBOL
#undef HUF_4X2_RELOAD_STREAM
_out: _out:
/* Save the final values of each of the state variables back to args. */ /* Save the final values of each of the state variables back to args. */
@ -1611,8 +1670,8 @@ HUF_decompress4X2_usingDTable_internal_fast(
const HUF_DTable* DTable, const HUF_DTable* DTable,
HUF_DecompressFastLoopFn loopFn) { HUF_DecompressFastLoopFn loopFn) {
void const* dt = DTable + 1; void const* dt = DTable + 1;
const BYTE* const iend = (const BYTE*)cSrc + 6; const BYTE* const ilowest = (const BYTE*)cSrc;
BYTE* const oend = (BYTE*)dst + dstSize; BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
HUF_DecompressFastArgs args; HUF_DecompressFastArgs args;
{ {
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
@ -1621,16 +1680,19 @@ HUF_decompress4X2_usingDTable_internal_fast(
return 0; return 0;
} }
assert(args.ip[0] >= args.ilimit); assert(args.ip[0] >= args.ilowest);
loopFn(&args); loopFn(&args);
/* note : op4 already verified within main loop */ /* note : op4 already verified within main loop */
assert(args.ip[0] >= iend); assert(args.ip[0] >= ilowest);
assert(args.ip[1] >= iend); assert(args.ip[1] >= ilowest);
assert(args.ip[2] >= iend); assert(args.ip[2] >= ilowest);
assert(args.ip[3] >= iend); assert(args.ip[3] >= ilowest);
assert(args.op[3] <= oend); assert(args.op[3] <= oend);
(void)iend;
assert(ilowest == args.ilowest);
assert(ilowest + 6 == args.iend[0]);
(void)ilowest;
/* finish bitStreams one by one */ /* finish bitStreams one by one */
{ {
@ -1679,7 +1741,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize,
} }
#endif #endif
if (!(flags & HUF_flags_disableFast)) { if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
if (ret != 0) if (ret != 0)
return ret; return ret;

View File

@ -10,11 +10,32 @@
#include "../common/portability_macros.h" #include "../common/portability_macros.h"
#if defined(__ELF__) && defined(__GNUC__)
/* Stack marking /* Stack marking
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
*/ */
#if defined(__ELF__) && defined(__GNUC__)
.section .note.GNU-stack,"",%progbits .section .note.GNU-stack,"",%progbits
#if defined(__aarch64__)
/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
* See: https://github.com/facebook/zstd/issues/3841
* See: https://gcc.godbolt.org/z/sqr5T4ffK
* See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
* See: https://reviews.llvm.org/D62609
*/
.pushsection .note.gnu.property, "a"
.p2align 3
.long 4 /* size of the name - "GNU\0" */
.long 0x10 /* size of descriptor */
.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */
.asciz "GNU"
.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
.long 4 /* pr_datasz - 4 bytes */
.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
.p2align 3 /* pr_padding - bring everything to 8 byte alignment */
.popsection
#endif
#endif #endif
#if ZSTD_ENABLE_ASM_X86_64_BMI2 #if ZSTD_ENABLE_ASM_X86_64_BMI2
@ -131,7 +152,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
movq 88(%rax), %bits3 movq 88(%rax), %bits3
movq 96(%rax), %dtable movq 96(%rax), %dtable
push %rax /* argument */ push %rax /* argument */
push 104(%rax) /* ilimit */ push 104(%rax) /* ilowest */
push 112(%rax) /* oend */ push 112(%rax) /* oend */
push %olimit /* olimit space */ push %olimit /* olimit space */
@ -156,11 +177,11 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
shrq $2, %r15 shrq $2, %r15
movq %ip0, %rax /* rax = ip0 */ movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilimit */ movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilimit */ subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %rbx /* rbx = ip0 - ilimit */ movq %rax, %rbx /* rbx = ip0 - ilowest */
/* rdx = (ip0 - ilimit) / 7 */ /* rdx = (ip0 - ilowest) / 7 */
movabsq $2635249153387078803, %rdx movabsq $2635249153387078803, %rdx
mulq %rdx mulq %rdx
subq %rdx, %rbx subq %rdx, %rbx
@ -183,9 +204,8 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
/* If (op3 + 20 > olimit) */ /* If (op3 + 20 > olimit) */
movq %op3, %rax /* rax = op3 */ movq %op3, %rax /* rax = op3 */
addq $20, %rax /* rax = op3 + 20 */ cmpq %rax, %olimit /* op3 == olimit */
cmpq %rax, %olimit /* op3 + 20 > olimit */ je .L_4X1_exit
jb .L_4X1_exit
/* If (ip1 < ip0) go to exit */ /* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1 cmpq %ip0, %ip1
@ -316,7 +336,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
/* Restore stack (oend & olimit) */ /* Restore stack (oend & olimit) */
pop %rax /* olimit */ pop %rax /* olimit */
pop %rax /* oend */ pop %rax /* oend */
pop %rax /* ilimit */ pop %rax /* ilowest */
pop %rax /* arg */ pop %rax /* arg */
/* Save ip / op / bits */ /* Save ip / op / bits */
@ -387,7 +407,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
movq 96(%rax), %dtable movq 96(%rax), %dtable
push %rax /* argument */ push %rax /* argument */
push %rax /* olimit */ push %rax /* olimit */
push 104(%rax) /* ilimit */ push 104(%rax) /* ilowest */
movq 112(%rax), %rax movq 112(%rax), %rax
push %rax /* oend3 */ push %rax /* oend3 */
@ -414,9 +434,9 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
/* We can consume up to 7 input bytes each iteration. */ /* We can consume up to 7 input bytes each iteration. */
movq %ip0, %rax /* rax = ip0 */ movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilimit */ movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilimit */ subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %r15 /* r15 = ip0 - ilimit */ movq %rax, %r15 /* r15 = ip0 - ilowest */
/* rdx = rax / 7 */ /* rdx = rax / 7 */
movabsq $2635249153387078803, %rdx movabsq $2635249153387078803, %rdx
@ -426,7 +446,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
addq %r15, %rdx addq %r15, %rdx
shrq $2, %rdx shrq $2, %rdx
/* r15 = (ip0 - ilimit) / 7 */ /* r15 = (ip0 - ilowest) / 7 */
movq %rdx, %r15 movq %rdx, %r15
/* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */ /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
@ -467,9 +487,8 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
/* If (op3 + 10 > olimit) */ /* If (op3 + 10 > olimit) */
movq %op3, %rax /* rax = op3 */ movq %op3, %rax /* rax = op3 */
addq $10, %rax /* rax = op3 + 10 */ cmpq %rax, %olimit /* op3 == olimit */
cmpq %rax, %olimit /* op3 + 10 > olimit */ je .L_4X2_exit
jb .L_4X2_exit
/* If (ip1 < ip0) go to exit */ /* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1 cmpq %ip0, %ip1
@ -537,7 +556,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
pop %rax /* oend1 */ pop %rax /* oend1 */
pop %rax /* oend2 */ pop %rax /* oend2 */
pop %rax /* oend3 */ pop %rax /* oend3 */
pop %rax /* ilimit */ pop %rax /* ilowest */
pop %rax /* olimit */ pop %rax /* olimit */
pop %rax /* arg */ pop %rax /* arg */

View File

@ -55,18 +55,19 @@
/*-******************************************************* /*-*******************************************************
* Dependencies * Dependencies
*********************************************************/ *********************************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/error_private.h"
#include "../common/zstd_internal.h" /* blockProperties_t */
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#include "../common/bits.h" /* ZSTD_highbit32 */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#include "../common/huf.h" #include "../common/huf.h"
#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */ #include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
#include "../common/zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
#include "../common/bits.h" /* ZSTD_highbit32 */
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
# include "../legacy/zstd_legacy.h" # include "../legacy/zstd_legacy.h"
@ -245,6 +246,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
dctx->disableHufAsm = 0; dctx->disableHufAsm = 0;
dctx->maxBlockSizeParam = 0;
} }
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@ -265,6 +267,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
#endif #endif
dctx->noForwardProgress = 0; dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0; dctx->oversizedDuration = 0;
dctx->isFrameDecompression = 1;
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuSupportsBmi2(); dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif #endif
@ -726,17 +729,17 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
return frameSizeInfo; return frameSizeInfo;
} }
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
{ {
ZSTD_frameSizeInfo frameSizeInfo; ZSTD_frameSizeInfo frameSizeInfo;
ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
return ZSTD_findFrameSizeInfoLegacy(src, srcSize); return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
#endif #endif
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
assert(ZSTD_isError(frameSizeInfo.compressedSize) || assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
@ -750,7 +753,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
ZSTD_frameHeader zfh; ZSTD_frameHeader zfh;
/* Extract Frame Header */ /* Extract Frame Header */
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
if (ZSTD_isError(ret)) if (ZSTD_isError(ret))
return ZSTD_errorFrameSizeInfo(ret); return ZSTD_errorFrameSizeInfo(ret);
if (ret > 0) if (ret > 0)
@ -793,15 +796,17 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
} }
} }
static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
return frameSizeInfo.compressedSize;
}
/** ZSTD_findFrameCompressedSize() : /** ZSTD_findFrameCompressedSize() :
* compatible with legacy mode * See docs in zstd.h
* `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame * Note: compatible with legacy mode */
* `srcSize` must be at least as large as the frame contained
* @return : the compressed size of the frame starting at `src` */
size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
{ {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
return frameSizeInfo.compressedSize;
} }
/** ZSTD_decompressBound() : /** ZSTD_decompressBound() :
@ -815,7 +820,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
unsigned long long bound = 0; unsigned long long bound = 0;
/* Iterate over each frame */ /* Iterate over each frame */
while (srcSize > 0) { while (srcSize > 0) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
size_t const compressedSize = frameSizeInfo.compressedSize; size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
@ -835,7 +840,7 @@ size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
/* Iterate over each frame */ /* Iterate over each frame */
while (srcSize > 0) { while (srcSize > 0) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
size_t const compressedSize = frameSizeInfo.compressedSize; size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
ZSTD_frameHeader zfh; ZSTD_frameHeader zfh;
@ -971,6 +976,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
} }
/* Shrink the blockSizeMax if enabled */
if (dctx->maxBlockSizeParam != 0)
dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
/* Loop on each block */ /* Loop on each block */
while (1) { while (1) {
BYTE* oBlockEnd = oend; BYTE* oBlockEnd = oend;
@ -1003,7 +1012,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
switch(blockProperties.blockType) switch(blockProperties.blockType)
{ {
case bt_compressed: case bt_compressed:
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); assert(dctx->isFrameDecompression == 1);
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
break; break;
case bt_raw : case bt_raw :
/* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
@ -1016,12 +1026,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
default: default:
RETURN_ERROR(corruption_detected, "invalid block type"); RETURN_ERROR(corruption_detected, "invalid block type");
} }
FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
if (ZSTD_isError(decodedSize)) return decodedSize; DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
if (dctx->validateChecksum) if (dctx->validateChecksum) {
XXH64_update(&dctx->xxhState, op, decodedSize); XXH64_update(&dctx->xxhState, op, decodedSize);
if (decodedSize != 0) }
if (decodedSize) /* support dst = NULL,0 */ {
op += decodedSize; op += decodedSize;
}
assert(ip != NULL); assert(ip != NULL);
ip += cBlockSize; ip += cBlockSize;
remainingSrcSize -= cBlockSize; remainingSrcSize -= cBlockSize;
@ -1051,7 +1063,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
return (size_t)(op-ostart); return (size_t)(op-ostart);
} }
static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const void* dict, size_t dictSize, const void* dict, size_t dictSize,
@ -1071,7 +1085,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) { while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) { if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
size_t decodedSize; size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize; if (ZSTD_isError(frameSize)) return frameSize;
@ -1081,6 +1095,15 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
if (ZSTD_isError(decodedSize)) return decodedSize; if (ZSTD_isError(decodedSize)) return decodedSize;
{
unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!");
if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) {
RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
"Frame header size does not match decoded size!");
}
}
assert(decodedSize <= dstCapacity); assert(decodedSize <= dstCapacity);
dst = (BYTE*)dst + decodedSize; dst = (BYTE*)dst + decodedSize;
dstCapacity -= decodedSize; dstCapacity -= decodedSize;
@ -1092,7 +1115,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
} }
#endif #endif
if (srcSize >= 4) { if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
U32 const magicNumber = MEM_readLE32(src); U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@ -1319,7 +1342,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{ {
case bt_compressed: case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); assert(dctx->isFrameDecompression == 1);
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
dctx->expected = 0; /* Streaming not supported */ dctx->expected = 0; /* Streaming not supported */
break; break;
case bt_raw : case bt_raw :
@ -1388,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decodeSkippableHeader: case ZSTDds_decodeSkippableHeader:
assert(src != NULL); assert(src != NULL);
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
assert(dctx->format != ZSTD_f_zstd1_magicless);
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
dctx->stage = ZSTDds_skipFrame; dctx->stage = ZSTDds_skipFrame;
@ -1548,6 +1573,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->litEntropy = dctx->fseEntropy = 0; dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0; dctx->dictID = 0;
dctx->bType = bt_reserved; dctx->bType = bt_reserved;
dctx->isFrameDecompression = 1;
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable; dctx->LLTptr = dctx->entropy.LLTable;
@ -1819,6 +1845,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.lowerBound = 0; bounds.lowerBound = 0;
bounds.upperBound = 1; bounds.upperBound = 1;
return bounds; return bounds;
case ZSTD_d_maxBlockSize:
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
return bounds;
default:; default:;
} }
@ -1863,6 +1893,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
case ZSTD_d_disableHuffmanAssembly: case ZSTD_d_disableHuffmanAssembly:
*value = (int)dctx->disableHufAsm; *value = (int)dctx->disableHufAsm;
return 0; return 0;
case ZSTD_d_maxBlockSize:
*value = dctx->maxBlockSizeParam;
return 0;
default:; default:;
} }
RETURN_ERROR(parameter_unsupported, ""); RETURN_ERROR(parameter_unsupported, "");
@ -1900,6 +1933,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
dctx->disableHufAsm = value != 0; dctx->disableHufAsm = value != 0;
return 0; return 0;
case ZSTD_d_maxBlockSize:
if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
dctx->maxBlockSizeParam = value;
return 0;
default:; default:;
} }
RETURN_ERROR(parameter_unsupported, ""); RETURN_ERROR(parameter_unsupported, "");
@ -1911,6 +1948,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
|| (reset == ZSTD_reset_session_and_parameters) ) { || (reset == ZSTD_reset_session_and_parameters) ) {
dctx->streamStage = zdss_init; dctx->streamStage = zdss_init;
dctx->noForwardProgress = 0; dctx->noForwardProgress = 0;
dctx->isFrameDecompression = 1;
} }
if ( (reset == ZSTD_reset_parameters) if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) { || (reset == ZSTD_reset_session_and_parameters) ) {
@ -1927,11 +1965,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
return ZSTD_sizeof_DCtx(dctx); return ZSTD_sizeof_DCtx(dctx);
} }
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
{ {
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
* the block at the beginning of the output buffer, and maintain a full window.
*
* We need another blockSize worth of buffer so that we can store split
* literals at the end of the block without overwriting the extDict window.
*/
unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize; size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
@ -1939,6 +1983,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
return minRBSize; return minRBSize;
} }
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
{
return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
}
size_t ZSTD_estimateDStreamSize(size_t windowSize) size_t ZSTD_estimateDStreamSize(size_t windowSize)
{ {
size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
@ -2134,12 +2183,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
&& zds->fParams.frameType != ZSTD_skippableFrame && zds->fParams.frameType != ZSTD_skippableFrame
&& (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
if (cSize <= (size_t)(iend-istart)) { if (cSize <= (size_t)(iend-istart)) {
/* shortcut : using single-pass mode */ /* shortcut : using single-pass mode */
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
if (ZSTD_isError(decompressedSize)) return decompressedSize; if (ZSTD_isError(decompressedSize)) return decompressedSize;
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
assert(istart != NULL); assert(istart != NULL);
ip = istart + cSize; ip = istart + cSize;
op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
@ -2161,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
DEBUGLOG(4, "Consume header"); DEBUGLOG(4, "Consume header");
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ if (zds->format == ZSTD_f_zstd1
&& (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame; zds->stage = ZSTDds_skipFrame;
} else { } else {
@ -2177,11 +2227,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
frameParameter_windowTooLarge, ""); frameParameter_windowTooLarge, "");
if (zds->maxBlockSizeParam != 0)
zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
/* Adapt buffer sizes to frame header instructions */ /* Adapt buffer sizes to frame header instructions */
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
: 0; : 0;
ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);

View File

@ -51,6 +51,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
* Block decoding * Block decoding
***************************************************************/ ***************************************************************/
static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
{
size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
return blockSizeMax;
}
/*! ZSTD_getcBlockSize() : /*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */ * Provides the size of compressed block from block header `src` */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@ -73,41 +80,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
{ {
if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
{ assert(litSize <= blockSizeMax);
/* room for litbuffer to fit without read faulting */ assert(dctx->isFrameDecompression || streaming == not_streaming);
dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; assert(expectedWriteSize <= blockSizeMax);
if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
/* If we aren't streaming, we can just put the literals after the output
* of the current block. We don't need to worry about overwriting the
* extDict of our window, because it doesn't exist.
* So if we have space after the end of the block, just put it there.
*/
dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
dctx->litBufferEnd = dctx->litBuffer + litSize; dctx->litBufferEnd = dctx->litBuffer + litSize;
dctx->litBufferLocation = ZSTD_in_dst; dctx->litBufferLocation = ZSTD_in_dst;
} } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
else if (litSize > ZSTD_LITBUFFEREXTRASIZE) /* Literals fit entirely within the extra buffer, put them there to avoid
{ * having to split the literals.
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ */
dctx->litBuffer = dctx->litExtraBuffer;
dctx->litBufferEnd = dctx->litBuffer + litSize;
dctx->litBufferLocation = ZSTD_not_in_dst;
} else {
assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
/* Literals must be split between the output block and the extra lit
* buffer. We fill the extra lit buffer with the tail of the literals,
* and put the rest of the literals at the end of the block, with
* WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
* This MUST not write more than our maxBlockSize beyond dst, because in
* streaming mode, that could overwrite part of our extDict window.
*/
if (splitImmediately) { if (splitImmediately) {
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
} } else {
else {
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
} }
dctx->litBufferLocation = ZSTD_split; dctx->litBufferLocation = ZSTD_split;
} assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
else
{
/* fits entirely within litExtraBuffer, so no split is necessary */
dctx->litBuffer = dctx->litExtraBuffer;
dctx->litBufferEnd = dctx->litBuffer + litSize;
dctx->litBufferLocation = ZSTD_not_in_dst;
} }
} }
/* Hidden declaration for fullbench */
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const void* src, size_t srcSize,
void* dst, size_t dstCapacity, const streaming_operation streaming);
/*! ZSTD_decodeLiteralsBlock() : /*! ZSTD_decodeLiteralsBlock() :
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
@ -116,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
* *
* @return : nb of bytes read from src (< srcSize ) * @return : nb of bytes read from src (< srcSize )
* note : symbol not declared but exposed for fullbench */ * note : symbol not declared but exposed for fullbench */
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
void* dst, size_t dstCapacity, const streaming_operation streaming) void* dst, size_t dstCapacity, const streaming_operation streaming)
{ {
@ -125,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
{ const BYTE* const istart = (const BYTE*) src; { const BYTE* const istart = (const BYTE*) src;
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
switch(litEncType) switch(litEncType)
{ {
@ -140,7 +156,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhlCode = (istart[0] >> 2) & 3;
U32 const lhc = MEM_readLE32(istart); U32 const lhc = MEM_readLE32(istart);
size_t hufSuccess; size_t hufSuccess;
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
int const flags = 0 int const flags = 0
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
@ -167,7 +183,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
break; break;
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
if (!singleStream) if (!singleStream)
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
"Not enough literals (%zu) for the 4-streams mode (min %u)", "Not enough literals (%zu) for the 4-streams mode (min %u)",
@ -214,10 +230,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
} }
if (dctx->litBufferLocation == ZSTD_split) if (dctx->litBufferLocation == ZSTD_split)
{ {
assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
} }
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
@ -232,7 +250,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
case set_basic: case set_basic:
{ size_t litSize, lhSize; { size_t litSize, lhSize;
U32 const lhlCode = ((istart[0]) >> 2) & 3; U32 const lhlCode = ((istart[0]) >> 2) & 3;
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
switch(lhlCode) switch(lhlCode)
{ {
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
@ -251,6 +269,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
@ -279,7 +298,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
case set_rle: case set_rle:
{ U32 const lhlCode = ((istart[0]) >> 2) & 3; { U32 const lhlCode = ((istart[0]) >> 2) & 3;
size_t litSize, lhSize; size_t litSize, lhSize;
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
switch(lhlCode) switch(lhlCode)
{ {
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
@ -298,7 +317,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
break; break;
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
if (dctx->litBufferLocation == ZSTD_split) if (dctx->litBufferLocation == ZSTD_split)
@ -320,6 +339,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
} }
} }
/* Hidden declaration for fullbench */
size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
const void* src, size_t srcSize,
void* dst, size_t dstCapacity);
size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
const void* src, size_t srcSize,
void* dst, size_t dstCapacity)
{
dctx->isFrameDecompression = 0;
return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
}
/* Default FSE distribution tables. /* Default FSE distribution tables.
* These are pre-calculated FSE decoding tables using default distributions as defined in specification : * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
@ -675,11 +706,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
/* SeqHead */ /* SeqHead */
nbSeq = *ip++; nbSeq = *ip++;
if (!nbSeq) {
*nbSeqPtr=0;
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
return 1;
}
if (nbSeq > 0x7F) { if (nbSeq > 0x7F) {
if (nbSeq == 0xFF) { if (nbSeq == 0xFF) {
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
@ -692,8 +718,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
} }
*nbSeqPtr = nbSeq; *nbSeqPtr = nbSeq;
if (nbSeq == 0) {
/* No sequence : section ends immediately */
RETURN_ERROR_IF(ip != iend, corruption_detected,
"extraneous data present in the Sequences section");
return (size_t)(ip - istart);
}
/* FSE table descriptors */ /* FSE table descriptors */
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@ -840,7 +874,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt
/* ZSTD_safecopyDstBeforeSrc(): /* ZSTD_safecopyDstBeforeSrc():
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
ptrdiff_t const diff = op - ip; ptrdiff_t const diff = op - ip;
BYTE* const oend = op + length; BYTE* const oend = op + length;
@ -869,6 +903,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length
* to be optimized for many small sequences, since those fall into ZSTD_execSequence(). * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
*/ */
FORCE_NOINLINE FORCE_NOINLINE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequenceEnd(BYTE* op, size_t ZSTD_execSequenceEnd(BYTE* op,
BYTE* const oend, seq_t sequence, BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit, const BYTE** litPtr, const BYTE* const litLimit,
@ -916,6 +951,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
*/ */
FORCE_NOINLINE FORCE_NOINLINE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
BYTE* const oend, const BYTE* const oend_w, seq_t sequence, BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit, const BYTE** litPtr, const BYTE* const litLimit,
@ -961,6 +997,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
} }
HINT_INLINE HINT_INLINE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequence(BYTE* op, size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence, BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit, const BYTE** litPtr, const BYTE* const litLimit,
@ -1059,6 +1096,7 @@ size_t ZSTD_execSequence(BYTE* op,
} }
HINT_INLINE HINT_INLINE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
BYTE* const oend, const BYTE* const oend_w, seq_t sequence, BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit, const BYTE** litPtr, const BYTE* const litLimit,
@ -1181,14 +1219,20 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
/**
* ZSTD_decodeSequence():
* @p longOffsets : tells the decoder to reload more bit while decoding large offsets
* only used in 32-bit mode
* @return : Sequence (litL + matchL + offset)
*/
FORCE_INLINE_TEMPLATE seq_t FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
{ {
seq_t seq; seq_t seq;
/* /*
* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be * ZSTD_seqSymbol is a 64 bits wide structure.
* loaded in one operation and extracted its fields by simply shifting or * It can be loaded in one operation
* bit-extracting on aarch64. * and its fields extracted by simply shifting or bit-extracting on aarch64.
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
* operations that cause performance drop. This can be avoided by using this * operations that cause performance drop. This can be avoided by using this
* ZSTD_memcpy hack. * ZSTD_memcpy hack.
@ -1261,7 +1305,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
} else { } else {
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp; seqState->prevOffset[0] = offset = temp;
@ -1288,17 +1332,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ if (!isLastSeq) {
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ /* don't update FSE state for last Sequence */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
BIT_reloadDStream(&seqState->DStream);
}
} }
return seq; return seq;
} }
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) #if DEBUGLEVEL >= 1
static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
{ {
size_t const windowSize = dctx->fParams.windowSize; size_t const windowSize = dctx->fParams.windowSize;
/* No dictionary used. */ /* No dictionary used. */
@ -1312,30 +1361,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix
/* Dictionary is active. */ /* Dictionary is active. */
return 1; return 1;
} }
#endif
MEM_STATIC void ZSTD_assertValidSequence( static void ZSTD_assertValidSequence(
ZSTD_DCtx const* dctx, ZSTD_DCtx const* dctx,
BYTE const* op, BYTE const* oend, BYTE const* op, BYTE const* oend,
seq_t const seq, seq_t const seq,
BYTE const* prefixStart, BYTE const* virtualStart) BYTE const* prefixStart, BYTE const* virtualStart)
{ {
#if DEBUGLEVEL >= 1 #if DEBUGLEVEL >= 1
size_t const windowSize = dctx->fParams.windowSize; if (dctx->isFrameDecompression) {
size_t const sequenceSize = seq.litLength + seq.matchLength; size_t const windowSize = dctx->fParams.windowSize;
BYTE const* const oLitEnd = op + seq.litLength; size_t const sequenceSize = seq.litLength + seq.matchLength;
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", BYTE const* const oLitEnd = op + seq.litLength;
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
assert(op <= oend); (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
assert((size_t)(oend - op) >= sequenceSize); assert(op <= oend);
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); assert((size_t)(oend - op) >= sequenceSize);
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
/* Offset must be within the dictionary. */ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); /* Offset must be within the dictionary. */
assert(seq.offset <= windowSize + dictSize); assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
} else { assert(seq.offset <= windowSize + dictSize);
/* Offset must be within our window. */ } else {
assert(seq.offset <= windowSize); /* Offset must be within our window. */
assert(seq.offset <= windowSize);
}
} }
#else #else
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
@ -1351,23 +1403,21 @@ DONT_VECTORIZE
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
const BYTE* ip = (const BYTE*)seqStart; const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize; const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + maxDstSize; BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
BYTE* op = ostart; BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr; const BYTE* litPtr = dctx->litPtr;
const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* litBufferEnd = dctx->litBufferEnd;
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
(void)frame;
/* Regen sequences */ /* Literals are split between internal buffer & output buffer */
if (nbSeq) { if (nbSeq) {
seqState_t seqState; seqState_t seqState;
dctx->fseEntropy = 1; dctx->fseEntropy = 1;
@ -1386,8 +1436,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
BIT_DStream_completed < BIT_DStream_overflow); BIT_DStream_completed < BIT_DStream_overflow);
/* decompress without overrunning litPtr begins */ /* decompress without overrunning litPtr begins */
{ { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
/* Align the decompression loop to 32 + 16 bytes. /* Align the decompression loop to 32 + 16 bytes.
* *
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
@ -1449,27 +1498,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
#endif #endif
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { for ( ; nbSeq; nbSeq--) {
size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
{ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif #endif
if (UNLIKELY(ZSTD_isError(oneSeqSize))) if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize; return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize; op += oneSeqSize;
if (UNLIKELY(!--nbSeq)) } }
break; DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
BIT_reloadDStream(&(seqState.DStream));
sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
}
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
if (nbSeq > 0) { if (nbSeq > 0) {
const size_t leftoverLit = dctx->litBufferEnd - litPtr; const size_t leftoverLit = dctx->litBufferEnd - litPtr;
if (leftoverLit) DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
{ if (leftoverLit) {
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
sequence.litLength -= leftoverLit; sequence.litLength -= leftoverLit;
@ -1478,24 +1526,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
litPtr = dctx->litExtraBuffer; litPtr = dctx->litExtraBuffer;
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst; dctx->litBufferLocation = ZSTD_not_in_dst;
{ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif #endif
if (UNLIKELY(ZSTD_isError(oneSeqSize))) if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize; return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize; op += oneSeqSize;
if (--nbSeq)
BIT_reloadDStream(&(seqState.DStream));
} }
nbSeq--;
} }
} }
if (nbSeq > 0) /* there is remaining lit from extra buffer */ if (nbSeq > 0) {
{ /* there is remaining lit from extra buffer */
#if defined(__GNUC__) && defined(__x86_64__) #if defined(__GNUC__) && defined(__x86_64__)
__asm__(".p2align 6"); __asm__(".p2align 6");
@ -1514,35 +1560,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
# endif # endif
#endif #endif
for (; ; ) { for ( ; nbSeq ; nbSeq--) {
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif #endif
if (UNLIKELY(ZSTD_isError(oneSeqSize))) if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize; return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize; op += oneSeqSize;
if (UNLIKELY(!--nbSeq))
break;
BIT_reloadDStream(&(seqState.DStream));
} }
} }
/* check if reached exact end */ /* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
RETURN_ERROR_IF(nbSeq, corruption_detected, ""); RETURN_ERROR_IF(nbSeq, corruption_detected, "");
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
/* save reps for next block */ /* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
} }
/* last literal segment */ /* last literal segment */
if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ if (dctx->litBufferLocation == ZSTD_split) {
{ /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
size_t const lastLLSize = litBufferEnd - litPtr; size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
if (op != NULL) { if (op != NULL) {
ZSTD_memmove(op, litPtr, lastLLSize); ZSTD_memmove(op, litPtr, lastLLSize);
@ -1552,15 +1597,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst; dctx->litBufferLocation = ZSTD_not_in_dst;
} }
{ size_t const lastLLSize = litBufferEnd - litPtr; /* copy last literals from internal buffer */
{ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
if (op != NULL) { if (op != NULL) {
ZSTD_memcpy(op, litPtr, lastLLSize); ZSTD_memcpy(op, litPtr, lastLLSize);
op += lastLLSize; op += lastLLSize;
} } }
}
return op-ostart; DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
return (size_t)(op - ostart);
} }
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE size_t
@ -1568,13 +1615,12 @@ DONT_VECTORIZE
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
const BYTE* ip = (const BYTE*)seqStart; const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize; const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
BYTE* op = ostart; BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr; const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize; const BYTE* const litEnd = litPtr + dctx->litSize;
@ -1582,7 +1628,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
(void)frame;
/* Regen sequences */ /* Regen sequences */
if (nbSeq) { if (nbSeq) {
@ -1597,11 +1642,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
assert(dst != NULL); assert(dst != NULL);
ZSTD_STATIC_ASSERT(
BIT_DStream_unfinished < BIT_DStream_completed &&
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
BIT_DStream_completed < BIT_DStream_overflow);
#if defined(__GNUC__) && defined(__x86_64__) #if defined(__GNUC__) && defined(__x86_64__)
__asm__(".p2align 6"); __asm__(".p2align 6");
__asm__("nop"); __asm__("nop");
@ -1616,73 +1656,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
# endif # endif
#endif #endif
for ( ; ; ) { for ( ; nbSeq ; nbSeq--) {
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
#endif #endif
if (UNLIKELY(ZSTD_isError(oneSeqSize))) if (UNLIKELY(ZSTD_isError(oneSeqSize)))
return oneSeqSize; return oneSeqSize;
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
op += oneSeqSize; op += oneSeqSize;
if (UNLIKELY(!--nbSeq))
break;
BIT_reloadDStream(&(seqState.DStream));
} }
/* check if reached exact end */ /* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); assert(nbSeq == 0);
RETURN_ERROR_IF(nbSeq, corruption_detected, ""); RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
/* save reps for next block */ /* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
} }
/* last literal segment */ /* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr; { size_t const lastLLSize = (size_t)(litEnd - litPtr);
DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
if (op != NULL) { if (op != NULL) {
ZSTD_memcpy(op, litPtr, lastLLSize); ZSTD_memcpy(op, litPtr, lastLLSize);
op += lastLLSize; op += lastLLSize;
} } }
}
return op-ostart; DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
return (size_t)(op - ostart);
} }
static size_t static size_t
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
static size_t static size_t
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE
ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
const BYTE* const prefixStart, const BYTE* const dictEnd) const BYTE* const prefixStart, const BYTE* const dictEnd)
{ {
prefetchPos += sequence.litLength; prefetchPos += sequence.litLength;
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
* No consequence though : memory address is only used for prefetching, not for dereferencing */ * No consequence though : memory address is only used for prefetching, not for dereferencing */
const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
} }
return prefetchPos + sequence.matchLength; return prefetchPos + sequence.matchLength;
@ -1697,20 +1734,18 @@ ZSTD_decompressSequencesLong_body(
ZSTD_DCtx* dctx, ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
const BYTE* ip = (const BYTE*)seqStart; const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize; const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
BYTE* op = ostart; BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr; const BYTE* litPtr = dctx->litPtr;
const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* litBufferEnd = dctx->litBufferEnd;
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
(void)frame;
/* Regen sequences */ /* Regen sequences */
if (nbSeq) { if (nbSeq) {
@ -1735,20 +1770,17 @@ ZSTD_decompressSequencesLong_body(
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */ /* prepare in advance */
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { for (seqNb=0; seqNb<seqAdvance; seqNb++) {
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
sequences[seqNb] = sequence; sequences[seqNb] = sequence;
} }
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
/* decompress without stomping litBuffer */ /* decompress without stomping litBuffer */
for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) { for (; seqNb < nbSeq; seqNb++) {
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
size_t oneSeqSize;
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
{
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
const size_t leftoverLit = dctx->litBufferEnd - litPtr; const size_t leftoverLit = dctx->litBufferEnd - litPtr;
if (leftoverLit) if (leftoverLit)
@ -1761,26 +1793,26 @@ ZSTD_decompressSequencesLong_body(
litPtr = dctx->litExtraBuffer; litPtr = dctx->litExtraBuffer;
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst; dctx->litBufferLocation = ZSTD_not_in_dst;
oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
#endif #endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
sequences[seqNb & STORED_SEQS_MASK] = sequence; sequences[seqNb & STORED_SEQS_MASK] = sequence;
op += oneSeqSize; op += oneSeqSize;
} } }
else else
{ {
/* lit buffer is either wholly contained in first or second split, or not split at all*/ /* lit buffer is either wholly contained in first or second split, or not split at all*/
oneSeqSize = dctx->litBufferLocation == ZSTD_split ? size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
#endif #endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
@ -1789,17 +1821,15 @@ ZSTD_decompressSequencesLong_body(
op += oneSeqSize; op += oneSeqSize;
} }
} }
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, ""); RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
/* finish queue */ /* finish queue */
seqNb -= seqAdvance; seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) { for ( ; seqNb<nbSeq ; seqNb++) {
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]); seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
{
const size_t leftoverLit = dctx->litBufferEnd - litPtr; const size_t leftoverLit = dctx->litBufferEnd - litPtr;
if (leftoverLit) if (leftoverLit) {
{
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
sequence->litLength -= leftoverLit; sequence->litLength -= leftoverLit;
@ -1808,11 +1838,10 @@ ZSTD_decompressSequencesLong_body(
litPtr = dctx->litExtraBuffer; litPtr = dctx->litExtraBuffer;
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
dctx->litBufferLocation = ZSTD_not_in_dst; dctx->litBufferLocation = ZSTD_not_in_dst;
{ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
#endif #endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize; op += oneSeqSize;
@ -1825,7 +1854,7 @@ ZSTD_decompressSequencesLong_body(
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
assert(!ZSTD_isError(oneSeqSize)); assert(!ZSTD_isError(oneSeqSize));
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
#endif #endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize; op += oneSeqSize;
@ -1837,8 +1866,7 @@ ZSTD_decompressSequencesLong_body(
} }
/* last literal segment */ /* last literal segment */
if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
{
size_t const lastLLSize = litBufferEnd - litPtr; size_t const lastLLSize = litBufferEnd - litPtr;
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
if (op != NULL) { if (op != NULL) {
@ -1856,17 +1884,16 @@ ZSTD_decompressSequencesLong_body(
} }
} }
return op-ostart; return (size_t)(op - ostart);
} }
static size_t static size_t
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
@ -1880,20 +1907,18 @@ DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
static BMI2_TARGET_ATTRIBUTE size_t static BMI2_TARGET_ATTRIBUTE size_t
DONT_VECTORIZE DONT_VECTORIZE
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
@ -1902,10 +1927,9 @@ static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
@ -1915,37 +1939,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
ZSTD_DCtx* dctx, ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset);
const int frame);
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static size_t static size_t
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
DEBUGLOG(5, "ZSTD_decompressSequences"); DEBUGLOG(5, "ZSTD_decompressSequences");
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
if (ZSTD_DCtx_get_bmi2(dctx)) { if (ZSTD_DCtx_get_bmi2(dctx)) {
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif #endif
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
static size_t static size_t
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
if (ZSTD_DCtx_get_bmi2(dctx)) { if (ZSTD_DCtx_get_bmi2(dctx)) {
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif #endif
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
@ -1960,16 +1981,15 @@ static size_t
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq, const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset, const ZSTD_longOffset_e isLongOffset)
const int frame)
{ {
DEBUGLOG(5, "ZSTD_decompressSequencesLong"); DEBUGLOG(5, "ZSTD_decompressSequencesLong");
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
if (ZSTD_DCtx_get_bmi2(dctx)) { if (ZSTD_DCtx_get_bmi2(dctx)) {
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif #endif
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
} }
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
@ -2051,20 +2071,20 @@ static size_t ZSTD_maxShortOffset(void)
size_t size_t
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming) const void* src, size_t srcSize, const streaming_operation streaming)
{ /* blockType == blockCompressed */ { /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
/* Note : the wording of the specification /* Note : the wording of the specification
* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX. * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
* This generally does not happen, as it makes little sense, * This generally does not happen, as it makes little sense,
* since an uncompressed block would feature same size and have no decompression cost. * since an uncompressed block would feature same size and have no decompression cost.
* Also, note that decoder from reference libzstd before < v1.5.4 * Also, note that decoder from reference libzstd before < v1.5.4
* would consider this edge case as an error. * would consider this edge case as an error.
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
* for broader compatibility with the deployed ecosystem of zstd decoders */ * for broader compatibility with the deployed ecosystem of zstd decoders */
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
/* Decode literals section */ /* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
@ -2079,8 +2099,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* Compute the maximum block size, which must also work when !frame and fParams are unset. /* Compute the maximum block size, which must also work when !frame and fParams are unset.
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
*/ */
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX)); size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart); size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
/* isLongOffset must be true if there are long offsets. /* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than ZSTD_maxShortOffset(). * Offsets are long if they are larger than ZSTD_maxShortOffset().
* We don't expect that to be the case in 64-bit mode. * We don't expect that to be the case in 64-bit mode.
@ -2145,21 +2165,22 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
{ {
#endif #endif
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
#endif #endif
} }
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
/* else */ /* else */
if (dctx->litBufferLocation == ZSTD_split) if (dctx->litBufferLocation == ZSTD_split)
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
else else
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
#endif #endif
} }
} }
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
{ {
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
@ -2176,8 +2197,10 @@ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
size_t dSize; size_t dSize;
dctx->isFrameDecompression = 0;
ZSTD_checkContinuity(dctx, dst, dstCapacity); ZSTD_checkContinuity(dctx, dst, dstCapacity);
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
FORWARD_IF_ERROR(dSize, "");
dctx->previousDstEnd = (char*)dst + dSize; dctx->previousDstEnd = (char*)dst + dSize;
return dSize; return dSize;
} }

View File

@ -47,7 +47,7 @@ typedef enum {
*/ */
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming); const void* src, size_t srcSize, const streaming_operation streaming);
/* ZSTD_buildFSETable() : /* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off) * generate FSE decoding table for one symbol (ll, ml or off)

View File

@ -153,6 +153,7 @@ struct ZSTD_DCtx_s
size_t litSize; size_t litSize;
size_t rleSize; size_t rleSize;
size_t staticSize; size_t staticSize;
int isFrameDecompression;
#if DYNAMIC_BMI2 != 0 #if DYNAMIC_BMI2 != 0
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
#endif #endif
@ -166,6 +167,7 @@ struct ZSTD_DCtx_s
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm; int disableHufAsm;
int maxBlockSizeParam;
/* streaming */ /* streaming */
ZSTD_dStreamStage streamStage; ZSTD_dStreamStage streamStage;

189
thirdparty/zstd/zstd.h vendored
View File

@ -106,7 +106,7 @@ extern "C" {
/*------ Version ------*/ /*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 5 #define ZSTD_VERSION_MINOR 5
#define ZSTD_VERSION_RELEASE 5 #define ZSTD_VERSION_RELEASE 6
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
/*! ZSTD_versionNumber() : /*! ZSTD_versionNumber() :
@ -228,7 +228,7 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
* for example to size a static array on stack. * for example to size a static array on stack.
* Will produce constant value 0 if srcSize too large. * Will produce constant value 0 if srcSize too large.
*/ */
#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U) #define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ #define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
/* ZSTD_isError() : /* ZSTD_isError() :
@ -249,7 +249,7 @@ ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compres
/*= Compression context /*= Compression context
* When compressing many times, * When compressing many times,
* it is recommended to allocate a context just once, * it is recommended to allocate a context just once,
* and re-use it for each successive compression operation. * and reuse it for each successive compression operation.
* This will make workload friendlier for system's memory. * This will make workload friendlier for system's memory.
* Note : re-using context is just a speed / resource optimization. * Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical. * It doesn't change the compression ratio, which remains identical.
@ -262,9 +262,9 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer *
/*! ZSTD_compressCCtx() : /*! ZSTD_compressCCtx() :
* Same as ZSTD_compress(), using an explicit ZSTD_CCtx. * Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
* Important : in order to behave similarly to `ZSTD_compress()`, * Important : in order to mirror `ZSTD_compress()` behavior,
* this function compresses at requested compression level, * this function compresses at the requested compression level,
* __ignoring any other parameter__ . * __ignoring any other advanced parameter__ .
* If any advanced parameter was set using the advanced API, * If any advanced parameter was set using the advanced API,
* they will all be reset. Only `compressionLevel` remains. * they will all be reset. Only `compressionLevel` remains.
*/ */
@ -276,7 +276,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
/*= Decompression context /*= Decompression context
* When decompressing many times, * When decompressing many times,
* it is recommended to allocate a context only once, * it is recommended to allocate a context only once,
* and re-use it for each successive compression operation. * and reuse it for each successive compression operation.
* This will make workload friendlier for system's memory. * This will make workload friendlier for system's memory.
* Use one context per thread for parallel execution. */ * Use one context per thread for parallel execution. */
typedef struct ZSTD_DCtx_s ZSTD_DCtx; typedef struct ZSTD_DCtx_s ZSTD_DCtx;
@ -286,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer *
/*! ZSTD_decompressDCtx() : /*! ZSTD_decompressDCtx() :
* Same as ZSTD_decompress(), * Same as ZSTD_decompress(),
* requires an allocated ZSTD_DCtx. * requires an allocated ZSTD_DCtx.
* Compatible with sticky parameters. * Compatible with sticky parameters (see below).
*/ */
ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
@ -302,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
* using ZSTD_CCtx_set*() functions. * using ZSTD_CCtx_set*() functions.
* Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
* "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
* __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
* *
* It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
* *
* This API supersedes all other "advanced" API entry points in the experimental section. * This API supersedes all other "advanced" API entry points in the experimental section.
* In the future, we expect to remove from experimental API entry points which are redundant with this API. * In the future, we expect to remove API entry points from experimental which are redundant with this API.
*/ */
@ -390,6 +390,19 @@ typedef enum {
* The higher the value of selected strategy, the more complex it is, * The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression. * resulting in stronger and slower compression.
* Special: value 0 means "use default strategy". */ * Special: value 0 means "use default strategy". */
ZSTD_c_targetCBlockSize=130, /* v1.5.6+
* Attempts to fit compressed block size into approximatively targetCBlockSize.
* Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
* Note that it's not a guarantee, just a convergence target (default:0).
* No target when targetCBlockSize == 0.
* This is helpful in low bandwidth streaming environments to improve end-to-end latency,
* when a client can make use of partial documents (a prominent example being Chrome).
* Note: this parameter is stable since v1.5.6.
* It was present as an experimental parameter in earlier versions,
* but it's not recommended using it with earlier library versions
* due to massive performance regressions.
*/
/* LDM mode parameters */ /* LDM mode parameters */
ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
* This parameter is designed to improve compression ratio * This parameter is designed to improve compression ratio
@ -469,7 +482,6 @@ typedef enum {
* ZSTD_c_forceMaxWindow * ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict * ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode * ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint * ZSTD_c_srcSizeHint
* ZSTD_c_enableDedicatedDictSearch * ZSTD_c_enableDedicatedDictSearch
* ZSTD_c_stableInBuffer * ZSTD_c_stableInBuffer
@ -490,7 +502,7 @@ typedef enum {
ZSTD_c_experimentalParam3=1000, ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003, /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
ZSTD_c_experimentalParam7=1004, ZSTD_c_experimentalParam7=1004,
ZSTD_c_experimentalParam8=1005, ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam9=1006,
@ -575,6 +587,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
/*! ZSTD_compress2() : /*! ZSTD_compress2() :
* Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
* (note that this entry point doesn't even expose a compression level parameter).
* ZSTD_compress2() always starts a new frame. * ZSTD_compress2() always starts a new frame.
* Should cctx hold data from a previously unfinished frame, everything about it is forgotten. * Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
* - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
@ -618,6 +631,7 @@ typedef enum {
* ZSTD_d_forceIgnoreChecksum * ZSTD_d_forceIgnoreChecksum
* ZSTD_d_refMultipleDDicts * ZSTD_d_refMultipleDDicts
* ZSTD_d_disableHuffmanAssembly * ZSTD_d_disableHuffmanAssembly
* ZSTD_d_maxBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly * note : never ever use experimentalParam? names directly
*/ */
@ -625,7 +639,8 @@ typedef enum {
ZSTD_d_experimentalParam2=1001, ZSTD_d_experimentalParam2=1001,
ZSTD_d_experimentalParam3=1002, ZSTD_d_experimentalParam3=1002,
ZSTD_d_experimentalParam4=1003, ZSTD_d_experimentalParam4=1003,
ZSTD_d_experimentalParam5=1004 ZSTD_d_experimentalParam5=1004,
ZSTD_d_experimentalParam6=1005
} ZSTD_dParameter; } ZSTD_dParameter;
@ -680,14 +695,14 @@ typedef struct ZSTD_outBuffer_s {
* A ZSTD_CStream object is required to track streaming operation. * A ZSTD_CStream object is required to track streaming operation.
* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. * ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. * It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
* *
* For parallel execution, use one separate ZSTD_CStream per thread. * For parallel execution, use one separate ZSTD_CStream per thread.
* *
* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
* *
* Parameters are sticky : when starting a new compression on the same context, * Parameters are sticky : when starting a new compression on the same context,
* it will re-use the same sticky parameters as previous compression session. * it will reuse the same sticky parameters as previous compression session.
* When in doubt, it's recommended to fully initialize the context before usage. * When in doubt, it's recommended to fully initialize the context before usage.
* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
@ -776,6 +791,11 @@ typedef enum {
* only ZSTD_e_end or ZSTD_e_flush operations are allowed. * only ZSTD_e_end or ZSTD_e_flush operations are allowed.
* Before starting a new compression job, or changing compression parameters, * Before starting a new compression job, or changing compression parameters,
* it is required to fully flush internal buffers. * it is required to fully flush internal buffers.
* - note: if an operation ends with an error, it may leave @cctx in an undefined state.
* Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
* In order to be re-employed after an error, a state must be reset,
* which can be done explicitly (ZSTD_CCtx_reset()),
* or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
*/ */
ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_outBuffer* output, ZSTD_outBuffer* output,
@ -835,7 +855,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
* *
* A ZSTD_DStream object is required to track streaming operations. * A ZSTD_DStream object is required to track streaming operations.
* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
* ZSTD_DStream objects can be re-used multiple times. * ZSTD_DStream objects can be reused multiple times.
* *
* Use ZSTD_initDStream() to start a new decompression operation. * Use ZSTD_initDStream() to start a new decompression operation.
* @return : recommended first input size * @return : recommended first input size
@ -889,6 +909,12 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
* @return : 0 when a frame is completely decoded and fully flushed, * @return : 0 when a frame is completely decoded and fully flushed,
* or an error code, which can be tested using ZSTD_isError(), * or an error code, which can be tested using ZSTD_isError(),
* or any other value > 0, which means there is some decoding or flushing to do to complete current frame. * or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
*
* Note: when an operation returns with an error code, the @zds state may be left in undefined state.
* It's UB to invoke `ZSTD_decompressStream()` on such a state.
* In order to re-use such a state, it must be first reset,
* which can be done explicitly (`ZSTD_DCtx_reset()`),
* or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
*/ */
ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
@ -1021,7 +1047,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
* *
* This API allows dictionaries to be used with ZSTD_compress2(), * This API allows dictionaries to be used with ZSTD_compress2(),
* ZSTD_compressStream2(), and ZSTD_decompressDCtx(). * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
* Dictionaries are sticky, they remain valid when same context is re-used, * Dictionaries are sticky, they remain valid when same context is reused,
* they only reset when the context is reset * they only reset when the context is reset
* with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
* In contrast, Prefixes are single-use. * In contrast, Prefixes are single-use.
@ -1239,7 +1265,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */ /* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64 #define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MIN 0
#define ZSTD_SRCSIZEHINT_MAX INT_MAX #define ZSTD_SRCSIZEHINT_MAX INT_MAX
@ -1527,25 +1553,38 @@ typedef enum {
ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
/*! ZSTD_generateSequences() : /*! ZSTD_generateSequences() :
* WARNING: This function is meant for debugging and informational purposes ONLY!
* Its implementation is flawed, and it will be deleted in a future version.
* It is not guaranteed to succeed, as there are several cases where it will give
* up and fail. You should NOT use this function in production code.
*
* This function is deprecated, and will be removed in a future version.
*
* Generate sequences using ZSTD_compress2(), given a source buffer. * Generate sequences using ZSTD_compress2(), given a source buffer.
* *
* @param zc The compression context to be used for ZSTD_compress2(). Set any
* compression parameters you need on this context.
* @param outSeqs The output sequences buffer of size @p outSeqsSize
* @param outSeqsSize The size of the output sequences buffer.
* ZSTD_sequenceBound(srcSize) is an upper bound on the number
* of sequences that can be generated.
* @param src The source buffer to generate sequences from of size @p srcSize.
* @param srcSize The size of the source buffer.
*
* Each block will end with a dummy sequence * Each block will end with a dummy sequence
* with offset == 0, matchLength == 0, and litLength == length of last literals. * with offset == 0, matchLength == 0, and litLength == length of last literals.
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
* simply acts as a block delimiter. * simply acts as a block delimiter.
* *
* @zc can be used to insert custom compression params. * @returns The number of sequences generated, necessarily less than
* This function invokes ZSTD_compress2(). * ZSTD_sequenceBound(srcSize), or an error code that can be checked
* * with ZSTD_isError().
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
* @return : number of sequences generated
*/ */
ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
ZSTDLIB_STATIC_API size_t ZSTDLIB_STATIC_API size_t
ZSTD_generateSequences( ZSTD_CCtx* zc, ZSTD_generateSequences(ZSTD_CCtx* zc,
ZSTD_Sequence* outSeqs, size_t outSeqsSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize,
const void* src, size_t srcSize); const void* src, size_t srcSize);
/*! ZSTD_mergeBlockDelimiters() : /*! ZSTD_mergeBlockDelimiters() :
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
@ -1640,56 +1679,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
/*! ZSTD_estimate*() : /*! ZSTD_estimate*() :
* These functions make it possible to estimate memory usage * These functions make it possible to estimate memory usage
* of a future {D,C}Ctx, before its creation. * of a future {D,C}Ctx, before its creation.
* This is useful in combination with ZSTD_initStatic(),
* which makes it possible to employ a static buffer for ZSTD_CCtx* state.
* *
* ZSTD_estimateCCtxSize() will provide a memory budget large enough * ZSTD_estimateCCtxSize() will provide a memory budget large enough
* for any compression level up to selected one. * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
* Note : Unlike ZSTD_estimateCStreamSize*(), this estimate * associated with any compression level up to max specified one.
* does not include space for a window buffer.
* Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
* The estimate will assume the input may be arbitrarily large, * The estimate will assume the input may be arbitrarily large,
* which is the worst case. * which is the worst case.
* *
* Note that the size estimation is specific for one-shot compression,
* it is not valid for streaming (see ZSTD_estimateCStreamSize*())
* nor other potential ways of using a ZSTD_CCtx* state.
*
* When srcSize can be bound by a known and rather "small" value, * When srcSize can be bound by a known and rather "small" value,
* this fact can be used to provide a tighter estimation * this knowledge can be used to provide a tighter budget estimation
* because the CCtx compression context will need less memory. * because the ZSTD_CCtx* state will need less memory for small inputs.
* This tighter estimation can be provided by more advanced functions * This tighter estimation can be provided by employing more advanced functions
* ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
* and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
* Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
* *
* Note : only single-threaded compression is supported. * Note : only single-threaded compression is supported.
* ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
*
* Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
* Size estimates assume that no external sequence producer is registered.
*/ */
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
/*! ZSTD_estimateCStreamSize() : /*! ZSTD_estimateCStreamSize() :
* ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
* It will also consider src size to be arbitrarily "large", which is worst case. * using any compression level up to the max specified one.
* It will also consider src size to be arbitrarily "large", which is a worst case scenario.
* If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
* ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
* Note : CStream size estimation is only correct for single-threaded compression. * Note : CStream size estimation is only correct for single-threaded compression.
* ZSTD_DStream memory budget depends on window Size. * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
* Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
* Size estimates assume that no external sequence producer is registered.
*
* ZSTD_DStream memory budget depends on frame's window Size.
* This information can be passed manually, using ZSTD_estimateDStreamSize, * This information can be passed manually, using ZSTD_estimateDStreamSize,
* or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
* Any frame requesting a window size larger than max specified one will be rejected.
* Note : if streaming is init with function ZSTD_init?Stream_usingDict(), * Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
* an internal ?Dict will be created, which additional size is not estimated here. * an internal ?Dict will be created, which additional size is not estimated here.
* In this case, get total size by adding ZSTD_estimate?DictSize * In this case, get total size by adding ZSTD_estimate?DictSize
* Note 2 : only single-threaded compression is supported.
* ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
* Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
* Size estimates assume that no external sequence producer is registered.
*/ */
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
/*! ZSTD_estimate?DictSize() : /*! ZSTD_estimate?DictSize() :
@ -1946,11 +1988,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
*/ */
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
/* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/* User's best guess of source size. /* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0. * Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size, * There is no guarantee that hint is close to actual source size,
@ -2430,6 +2467,22 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
*/ */
#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 #define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
/* ZSTD_d_maxBlockSize
* Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
* The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
*
* Forces the decompressor to reject blocks whose content size is
* larger than the configured maxBlockSize. When maxBlockSize is
* larger than the windowSize, the windowSize is used instead.
* This saves memory on the decoder when you know all blocks are small.
*
* This option is typically used in conjunction with ZSTD_c_maxBlockSize.
*
* WARNING: This causes the decoder to reject otherwise valid frames
* that have block sizes larger than the configured maxBlockSize.
*/
#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
/*! ZSTD_DCtx_setFormat() : /*! ZSTD_DCtx_setFormat() :
* This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
@ -2557,7 +2610,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
* explicitly specified. * explicitly specified.
* *
* start a new frame, using same parameters from previous frame. * start a new frame, using same parameters from previous frame.
* This is typically useful to skip dictionary loading stage, since it will re-use it in-place. * This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
* Note that zcs must be init at least once before using ZSTD_resetCStream(). * Note that zcs must be init at least once before using ZSTD_resetCStream().
* If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
* If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
@ -2633,7 +2686,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z
* *
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
* *
* re-use decompression parameters from previous init; saves dictionary loading * reuse decompression parameters from previous init; saves dictionary loading
*/ */
ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
@ -2765,7 +2818,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) #define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
typedef size_t ZSTD_sequenceProducer_F ( typedef size_t (*ZSTD_sequenceProducer_F) (
void* sequenceProducerState, void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
@ -2797,7 +2850,23 @@ ZSTDLIB_STATIC_API void
ZSTD_registerSequenceProducer( ZSTD_registerSequenceProducer(
ZSTD_CCtx* cctx, ZSTD_CCtx* cctx,
void* sequenceProducerState, void* sequenceProducerState,
ZSTD_sequenceProducer_F* sequenceProducer ZSTD_sequenceProducer_F sequenceProducer
);
/*! ZSTD_CCtxParams_registerSequenceProducer() :
* Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
* This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
* which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
*
* If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
* is required, then this function is for you. Otherwise, you probably don't need it.
*
* See tests/zstreamtest.c for example usage. */
ZSTDLIB_STATIC_API void
ZSTD_CCtxParams_registerSequenceProducer(
ZSTD_CCtx_params* params,
void* sequenceProducerState,
ZSTD_sequenceProducer_F sequenceProducer
); );
@ -2820,7 +2889,7 @@ ZSTD_registerSequenceProducer(
A ZSTD_CCtx object is required to track streaming operations. A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
ZSTD_CCtx object can be re-used multiple times within successive compression operations. ZSTD_CCtx object can be reused multiple times within successive compression operations.
Start by initializing a context. Start by initializing a context.
Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
@ -2841,7 +2910,7 @@ ZSTD_registerSequenceProducer(
It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
`ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
*/ */
/*===== Buffer-less streaming compression functions =====*/ /*===== Buffer-less streaming compression functions =====*/
@ -2873,7 +2942,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
A ZSTD_DCtx object is required to track streaming operations. A ZSTD_DCtx object is required to track streaming operations.
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
A ZSTD_DCtx object can be re-used multiple times. A ZSTD_DCtx object can be reused multiple times.
First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.