Skip to content

Commit f8c7d1d

Browse files
committed
Restores unaligned buffer allocations, but changes allocation order to allocate them after aligned buffers
1 parent 7e0bcf7 commit f8c7d1d

File tree

3 files changed

+77
-65
lines changed

3 files changed

+77
-65
lines changed

lib/compress/zstd_compress.c

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,9 +1653,9 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
16531653
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
16541654
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
16551655
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
1656-
size_t const tokenSpace = ZSTD_cwksp_aligned_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1656+
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
16571657
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
1658-
+ 3 * ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(BYTE));
1658+
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
16591659
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
16601660
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
16611661
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
@@ -1666,21 +1666,28 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
16661666
ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
16671667

16681668

1669-
size_t const bufferSpace = ZSTD_cwksp_aligned_alloc_size(buffInSize)
1670-
+ ZSTD_cwksp_aligned_alloc_size(buffOutSize);
1669+
size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
1670+
+ ZSTD_cwksp_alloc_size(buffOutSize);
16711671

1672-
size_t const cctxSpace = isStatic ? ZSTD_cwksp_aligned_alloc_size(sizeof(ZSTD_CCtx)) : 0;
1672+
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
16731673

16741674
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
16751675
size_t const externalSeqSpace = useSequenceProducer
16761676
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
16771677
: 0;
16781678

1679-
size_t const objectsSpace = cctxSpace + entropySpace + blockStateSpace;
1680-
size_t const tableSpace = ldmSpace + matchStateSize;
1681-
size_t const alignedSpace = tokenSpace + bufferSpace + externalSeqSpace + ldmSeqSpace;
1682-
1683-
size_t const neededSpace = objectsSpace + tableSpace + alignedSpace;
1679+
size_t const neededSpace =
1680+
cctxSpace +
1681+
entropySpace +
1682+
blockStateSpace +
1683+
ldmSpace +
1684+
ldmSeqSpace +
1685+
matchStateSize +
1686+
tokenSpace +
1687+
bufferSpace +
1688+
externalSeqSpace;
1689+
1690+
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
16841691
return neededSpace;
16851692
}
16861693

@@ -2125,38 +2132,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
21252132
needsIndexReset,
21262133
ZSTD_resetTarget_CCtx), "");
21272134

2128-
/* ZSTD_wildcopy() is used to copy into the literals buffer,
2129-
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
2130-
*/
2131-
zc->seqStore.litStart = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, blockSize + WILDCOPY_OVERLENGTH);
2132-
zc->seqStore.maxNbLit = blockSize;
2133-
2134-
/* buffers */
2135-
zc->bufferedPolicy = zbuff;
2136-
zc->inBuffSize = buffInSize;
2137-
zc->inBuff = (char*)ZSTD_cwksp_reserve_aligned(ws, buffInSize);
2138-
zc->outBuffSize = buffOutSize;
2139-
zc->outBuff = (char*)ZSTD_cwksp_reserve_aligned(ws, buffOutSize);
2140-
2141-
/* ldm bucketOffsets table */
2142-
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
2143-
/* TODO: avoid memset? */
2144-
size_t const numBuckets =
2145-
((size_t)1) << (params->ldmParams.hashLog -
2146-
params->ldmParams.bucketSizeLog);
2147-
zc->ldmState.bucketOffsets = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, numBuckets);
2148-
ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
2149-
}
2150-
2151-
/* sequences storage */
2152-
ZSTD_referenceExternalSequences(zc, NULL, 0);
2153-
zc->seqStore.maxNbSeq = maxNbSeq;
2154-
zc->seqStore.llCode = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(BYTE));
2155-
zc->seqStore.mlCode = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(BYTE));
2156-
zc->seqStore.ofCode = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(BYTE));
21572135
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
21582136

2159-
21602137
/* ldm hash table */
21612138
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
21622139
/* TODO: avoid memset? */
@@ -2178,8 +2155,39 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
21782155
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
21792156
}
21802157

2158+
/* buffers */
2159+
2160+
/* ZSTD_wildcopy() is used to copy into the literals buffer,
2161+
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
2162+
*/
2163+
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
2164+
zc->seqStore.maxNbLit = blockSize;
2165+
2166+
zc->bufferedPolicy = zbuff;
2167+
zc->inBuffSize = buffInSize;
2168+
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
2169+
zc->outBuffSize = buffOutSize;
2170+
zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
2171+
2172+
/* ldm bucketOffsets table */
2173+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
2174+
/* TODO: avoid memset? */
2175+
size_t const numBuckets =
2176+
((size_t)1) << (params->ldmParams.hashLog -
2177+
params->ldmParams.bucketSizeLog);
2178+
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
2179+
ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
2180+
}
2181+
2182+
/* sequences storage */
2183+
ZSTD_referenceExternalSequences(zc, NULL, 0);
2184+
zc->seqStore.maxNbSeq = maxNbSeq;
2185+
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
2186+
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
2187+
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
2188+
21812189
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
2182-
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
2190+
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
21832191

21842192
zc->initialized = 1;
21852193

@@ -5210,7 +5218,7 @@ size_t ZSTD_estimateCDictSize_advanced(
52105218
+ ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),
52115219
/* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
52125220
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0
5213-
: ZSTD_cwksp_aligned_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
5221+
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
52145222
}
52155223

52165224
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
@@ -5295,7 +5303,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
52955303
ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
52965304
ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
52975305
(dictLoadMethod == ZSTD_dlm_byRef ? 0
5298-
: ZSTD_cwksp_aligned_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
5306+
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
52995307
void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
53005308
ZSTD_cwksp ws;
53015309
ZSTD_CDict* cdict;

lib/compress/zstd_cwksp.h

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ extern "C" {
4747
typedef enum {
4848
ZSTD_cwksp_alloc_objects,
4949
ZSTD_cwksp_alloc_aligned_init_once,
50-
ZSTD_cwksp_alloc_aligned
50+
ZSTD_cwksp_alloc_aligned,
51+
ZSTD_cwksp_alloc_buffers
5152
} ZSTD_cwksp_alloc_phase_e;
5253

5354
/**
@@ -101,7 +102,7 @@ typedef enum {
101102
* Workspace Layout:
102103
*
103104
* [ ... workspace ... ]
104-
* [objects][tables ... ->] free space [<- ... aligned][<- ... init once]
105+
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
105106
*
106107
* The various objects that live in the workspace are divided into the
107108
* following categories, and are allocated separately:
@@ -138,6 +139,9 @@ typedef enum {
138139
* location before reading from it.
139140
* Buffers are aligned to 64 bytes.
140141
*
142+
* - Buffers: these buffers are used for various purposes that don't require
143+
* any alignment or initialization before they're used. This means they can
144+
* be moved around at no cost for a new compression.
141145
*
142146
* Allocating Memory:
143147
*
@@ -147,6 +151,7 @@ typedef enum {
147151
* 1. Objects
148152
* 2. Init once / Tables
149153
* 3. Aligned / Tables
154+
* 4. Buffers / Tables
150155
*
151156
* Attempts to reserve objects of different types out of order will fail.
152157
*/
@@ -183,7 +188,6 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
183188
assert(ws->allocStart <= ws->workspaceEnd);
184189
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
185190
assert(ws->workspace <= ws->initOnceStart);
186-
assert((size_t)ws->allocStart % ZSTD_CWKSP_ALIGNMENT_BYTES == 0);
187191
#if ZSTD_MEMORY_SANITIZER
188192
{
189193
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
@@ -242,11 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
242246
* for internal purposes (currently only alignment).
243247
*/
244248
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
245-
/* For alignment, the wksp will always allocate an additional ZSTD_CWKSP_ALIGNMENT_BYTES
246-
* bytes to align the beginning of tables section, this will ensure that tables section
247-
* and aligned buffers sections are aligned.
249+
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
250+
* bytes to align the beginning of tables section and end of buffers;
248251
*/
249-
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
252+
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
250253
return slackSpace;
251254
}
252255

@@ -311,7 +314,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
311314
{
312315
assert(phase >= ws->phase);
313316
if (phase > ws->phase) {
314-
/* Going from allocating objects to allocating aligned / tables */
317+
/* Going from allocating objects to allocating initOnce / tables */
315318
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
316319
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
317320
ws->tableValidEnd = ws->objectEnd;
@@ -377,6 +380,13 @@ ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase
377380
return alloc;
378381
}
379382

383+
/**
384+
* Reserves and returns unaligned memory.
385+
*/
386+
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
387+
{
388+
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
389+
}
380390

381391
/**
382392
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
@@ -673,7 +683,7 @@ MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
673683

674684
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
675685
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
676-
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
686+
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
677687
}
678688

679689
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
@@ -688,17 +698,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
688698
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
689699
* actual amount of space used.
690700
*/
691-
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
692-
size_t const estimatedSpace, int resizedWorkspace) {
693-
if (resizedWorkspace) {
694-
/* Resized/newly allocated wksp should have exact bounds up to one alignment */
695-
return (estimatedSpace - 64) <= ZSTD_cwksp_used(ws) && ZSTD_cwksp_used(ws) <= estimatedSpace;
696-
} else {
697-
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
698-
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
699-
*/
700-
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 64) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
701-
}
701+
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
702+
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
703+
* the alignment bytes difference between estimation and actual usage */
704+
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
705+
ZSTD_cwksp_used(ws) <= estimatedSpace;
702706
}
703707

704708

lib/compress/zstd_ldm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
157157
size_t const ldmHSize = ((size_t)1) << params.hashLog;
158158
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
159159
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
160-
size_t const totalSize = ZSTD_cwksp_aligned_alloc_size(ldmBucketSize)
161-
+ ZSTD_cwksp_aligned_alloc_size(ldmHSize * sizeof(ldmEntry_t));
160+
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
161+
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
162162
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
163163
}
164164

0 commit comments

Comments
 (0)