Skip to content
Open
25 changes: 25 additions & 0 deletions lib/common/bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,29 @@ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCo
return 31 - ZSTD_countLeadingZeros32(val);
}

/* ZSTD_rotateRight_*():
* Rotates a bitfield to the right by "count" bits.
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
*/
FORCE_INLINE_TEMPLATE
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
assert(count < 64);
count &= 0x3F; /* for fickle pattern recognition */
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
}

FORCE_INLINE_TEMPLATE

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: do you need all 3 variants ?
Only ZSTD_rotateRight_U64() seems in use.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All 3 variants are currently in use in ZSTD_row_matchMaskGroupWidth, I opted to moving all of them here from zstd_lazy.c instead of just one as it's the more natural place for them in any case.

U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
assert(count < 32);
count &= 0x1F; /* for fickle pattern recognition */
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
}

FORCE_INLINE_TEMPLATE
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
assert(count < 16);
count &= 0x0F; /* for fickle pattern recognition */
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
}

#endif /* ZSTD_BITS_H */
4 changes: 4 additions & 0 deletions lib/common/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,10 @@ void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);

/* Print shadow and origin for the memory range to stderr in a human-readable
format. */
void __msan_print_shadow(const volatile void *x, size_t size);
#endif

#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
Expand Down
122 changes: 73 additions & 49 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "zstd_opt.h"
#include "zstd_ldm.h"
#include "zstd_compress_superblock.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */

/* ***************************************************************
* Tuning parameters
Expand Down Expand Up @@ -1884,6 +1884,19 @@ typedef enum {
ZSTD_resetTarget_CCtx
} ZSTD_resetTarget_e;

/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
static U64 ZSTD_bitmix(U64 val, U64 len) {
val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
val *= 0x9FB21C651E98DF25ULL;
val ^= (val >> 35) + len ;
val *= 0x9FB21C651E98DF25ULL;
return val ^ (val >> 28);
}

/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
}

static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
Expand Down Expand Up @@ -1932,6 +1945,27 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_cwksp_clean_tables(ws);
}

if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
/* Row match finder needs an additional table of hashes ("tags") */
size_t const tagTableSize = hSize * sizeof(U16);
/* We want to generate a new salt in case we reset a Cctx, but we always want to use
* 0 when we reset a Cdict */
if(forWho == ZSTD_resetTarget_CCtx) {
ms->tagTable = (U16 *) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
ZSTD_advanceHashSalt(ms);
} else {
/* When we are not salting we want to always memset the memory */
ms->tagTable = (U16 *) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
ZSTD_memset(ms->tagTable, 0, tagTableSize);
ms->hashSalt = 0;
}
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
assert(cParams->hashLog >= rowLog);
ms->rowHashLog = cParams->hashLog - rowLog;
}
}

/* opt parser space */
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space");
Expand All @@ -1943,19 +1977,6 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}

if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
{ /* Row match finder needs an additional table of hashes ("tags") */
size_t const tagTableSize = hSize*sizeof(U16);
ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
}
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
assert(cParams->hashLog >= rowLog);
ms->rowHashLog = cParams->hashLog - rowLog;
}
}

ms->cParams = *cParams;

RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
Expand Down Expand Up @@ -2102,13 +2123,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,

ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);

FORWARD_IF_ERROR(ZSTD_reset_matchState(
&zc->blockState.matchState,
ws,
&params->cParams,
params->useRowMatchFinder,
crp,
needsIndexReset,
ZSTD_resetTarget_CCtx), "");

zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));

/* ldm hash table */
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
/* TODO: avoid memset? */
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
zc->maxNbLdmSequences = maxNbLdmSeq;

ZSTD_window_init(&zc->ldmState.window);
zc->ldmState.loadedDictEnd = 0;
}

/* reserve space for block-level external sequences */
if (params->useSequenceProducer) {
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
zc->externalMatchCtx.seqBuffer =
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
}

/* buffers */

/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
zc->seqStore.maxNbLit = blockSize;

/* buffers */
zc->bufferedPolicy = zbuff;
zc->inBuffSize = buffInSize;
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
Expand All @@ -2131,40 +2185,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));

FORWARD_IF_ERROR(ZSTD_reset_matchState(
&zc->blockState.matchState,
ws,
&params->cParams,
params->useRowMatchFinder,
crp,
needsIndexReset,
ZSTD_resetTarget_CCtx), "");

/* ldm hash table */
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
/* TODO: avoid memset? */
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
zc->maxNbLdmSequences = maxNbLdmSeq;

ZSTD_window_init(&zc->ldmState.window);
zc->ldmState.loadedDictEnd = 0;
}

/* reserve space for block-level external sequences */
if (params->useSequenceProducer) {
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
zc->externalMatchCtx.seqBuffer =
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
}

DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));

zc->initialized = 1;

Expand Down Expand Up @@ -2341,8 +2364,9 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
size_t const tagTableSize = hSize*sizeof(U16);
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
cdict->matchState.tagTable,
tagTableSize);
cdict->matchState.tagTable,
tagTableSize);
cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
}
}

Expand Down
51 changes: 39 additions & 12 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ struct ZSTD_matchState_t {
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */

U32* hashTable;
U32* hashTable3;
Expand Down Expand Up @@ -787,28 +789,35 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes
***************************************/
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }

static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h); }
static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }

static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }

static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }

static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }

static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }


MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
Expand All @@ -828,6 +837,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}

MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);

switch(mls)
{
default:
case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather recommend to use the upper part of hashSalt ((U32)(hashSalt >> 32)),
as it's less predictable than the lower part.

An alternative approach could be to make hashSalt evolution of lower bits less predictable.

case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
}
}


/** ZSTD_ipow() :
* Return base^exponent.
*/
Expand Down
Loading