Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions lib/common/huf.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
/* ****************************************
* HUF detailed API
* ****************************************/
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btlazy2
typedef enum {
HUF_depth_fast, /** Use heuristic to find the table depth**/
HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/
} HUF_depth_mode;

/*! HUF_compress() does the following:
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
Expand All @@ -185,7 +190,8 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods.
*/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
unsigned HUF_minTableLog(size_t srcSize, unsigned maxSymbolValue);
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode);
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
Expand All @@ -199,6 +205,7 @@ typedef enum {
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} HUF_repeat;

/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
Expand All @@ -209,7 +216,8 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2,
unsigned suspectUncompressible, HUF_depth_mode depthMode);

/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
Expand Down Expand Up @@ -315,7 +323,8 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2,
unsigned suspectUncompressible, HUF_depth_mode depthMode);

size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
Expand Down
71 changes: 54 additions & 17 deletions lib/compress/huf_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1222,15 +1222,6 @@ static size_t HUF_compressCTable_internal(
return (size_t)(op-ostart);
}


unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
{
unsigned tableLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
assert(tableLog <= HUF_TABLELOG_MAX);

return tableLog;
}

typedef struct {
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
Expand All @@ -1244,6 +1235,51 @@ typedef struct {
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */

unsigned HUF_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
Copy link
Contributor

@Cyan4973 Cyan4973 Oct 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would simplify this function.

The reason srcSize was previously part of the formula
is that maxSymbolValue used to be a generous upper bound of the real cardinality.
Using srcSize as a second estimator would impact cases where srcSize < maxSymbolValue, producing a tighter upper bound estimate for these cases.

But now that we have the real cardinality, there is no need for another approximative upper bound.
Instead, derive the result from symbolCardinality directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense thanks for explaining, I initially wasn't sure why we did the first check.

U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
Copy link
Contributor

@Cyan4973 Cyan4973 Oct 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The little detail that matters:

Note that, in this formula, + 2 is itself an heuristic.
That's because this code is taken from FSE_minTableLog(), where it's presumed to be employed in combination with a fast heuristic.
Consequently, one of the goals was to avoid providing a too low minimal, that would end up being, in general, a rather poor candidate.

But now, with this new brute-force strategy, as long as the candidate is sometimes a better one, it's worth investigating.

So it's possible to change this formula by + 1, which is the real minimal.
(Note : the real minimal should actually be based on the cardinality of the distribution, for which maxSymbolValue is merely a cheap upper bound)

I tried that, with a quick test on silesia.tar.
The new low limit allows finding a few more bytes here and there, achieving an additional 1-2 KB savings at higher compression modes. Not big in absolute value, but relative to existing savings, it improves this strategy by ~20%, so that's a non-negligible contributor.

Where it shines though is in combination with small blocks.
For example, when cutting silesia.tar into blocks of 1 KB, it achieves additional savings of almost 300 KB. Not a mistake. So it's a game changer for this scenario.

The downside of this strategy is that there is now one more distribution to test.
So it's even slower.
That could imply revisiting the algorithm's triggering threshold.

Alternatively, it could also provide an incentive to invest time into a more optimized method, that requires less cpu effort. Which, I believe, is possible, without loss of compression ratio.
Maybe this could be dealt with in a follow up...

U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
if (minBits < FSE_MIN_TABLELOG) minBits = FSE_MIN_TABLELOG;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This minimum restriction FSE_MIN_TABLELOG is only specific to FSE.
It's not necessary for Huffman.
Consequently, this line can be dropped.

assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
}

unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some interface implementation detail :
HUF_CElt* table : when expressed this way, it implies that table is an expected output of the function.

But it's not.
Effectively, table is only provided as a kind of temporary workspace, anything it may contain is just thrown away afterwards.

How to make the difference ? Well, to follow the established convention, table should not exist as a separate parameter, but be blended inside workSpace.

Now, I appreciate that it's probably easier to employ workSpace and HUF_CElt* separately because that's how they exist from the caller side, and trying to bundle them together in a single workSpace might end up being more messy for the caller.

So okay, implementation complexity is a criteria.

In which case, please document clearly that table is just a specialized "workspace", not an expected output of the function.

{
unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);

if (depthMode == HUF_depth_optimal) { /** Test valid depths and return optimal **/
BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
size_t optSize = ((size_t) ~0);
unsigned huffLog;
size_t maxBits, hSize, newSize;

if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog;

for (huffLog = HUF_minTableLog(srcSize, maxSymbolValue); huffLog <= maxTableLog; huffLog++) {
maxBits = HUF_buildCTable_wksp(table, count,
maxSymbolValue, huffLog,
workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;

hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits,
workSpace, wkspSize);
if (ERR_isError(hSize)) continue;

newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;

if (newSize < optSize) {
optSize = newSize;
optLog = huffLog;
}
}
}
assert(optLog <= HUF_TABLELOG_MAX);
return optLog;
}

/* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
Expand All @@ -1254,7 +1290,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
const int bmi2, unsigned suspectUncompressible)
const int bmi2, unsigned suspectUncompressible, HUF_depth_mode depthMode)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst;
Expand Down Expand Up @@ -1318,7 +1354,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
}

/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, depthMode);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
Expand Down Expand Up @@ -1367,21 +1403,21 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
NULL, NULL, 0, 0 /*bmi2*/, 0, HUF_depth_fast);
}

size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
int bmi2, unsigned suspectUncompressible)
int bmi2, unsigned suspectUncompressible, HUF_depth_mode depthMode)
{
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable,
repeat, preferRepeat, bmi2, suspectUncompressible);
repeat, preferRepeat, bmi2, suspectUncompressible, depthMode);
}

/* HUF_compress4X_repeat():
Expand All @@ -1396,7 +1432,7 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
NULL, NULL, 0, 0 /*bmi2*/, 0, HUF_depth_fast);
}

/* HUF_compress4X_repeat():
Expand All @@ -1407,13 +1443,14 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2,
unsigned suspectUncompressible, HUF_depth_mode depthMode)
{
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible, depthMode);
}

#ifndef ZSTD_NO_UNUSED_FUNCTIONS
Expand Down
13 changes: 9 additions & 4 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -2653,14 +2653,16 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
/* Base suspicion of uncompressibility on ratio of literals to sequences */
unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
size_t const litSize = (size_t)(seqStorePtr->lit - literals);

HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast;
size_t const cSize = ZSTD_compressLiterals(
&prevEntropy->huf, &nextEntropy->huf,
cctxParams->cParams.strategy,
ZSTD_literalsCompressionIsDisabled(cctxParams),
op, dstCapacity,
literals, litSize,
entropyWorkspace, entropyWkspSize,
bmi2, suspectUncompressible);
bmi2, suspectUncompressible, depthMode);
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
assert(cSize <= dstCapacity);
op += cSize;
Expand Down Expand Up @@ -3107,7 +3109,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
ZSTD_hufCTables_t* nextHuf,
ZSTD_hufCTablesMetadata_t* hufMetadata,
const int literalsCompressionIsDisabled,
void* workspace, size_t wkspSize)
void* workspace, size_t wkspSize, HUF_depth_mode depthMode)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
Expand Down Expand Up @@ -3164,7 +3166,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi

/* Build Huffman Tree */
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, depthMode);
assert(huffLog <= LitHufLog);
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
maxSymbolValue, huffLog,
Expand Down Expand Up @@ -3268,12 +3270,15 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
void* workspace, size_t wkspSize)
{
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast;

entropyMetadata->hufMetadata.hufDesSize =
ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
&prevEntropy->huf, &nextEntropy->huf,
&entropyMetadata->hufMetadata,
ZSTD_literalsCompressionIsDisabled(cctxParams),
workspace, wkspSize);
workspace, wkspSize, depthMode);

FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
entropyMetadata->fseMetadata.fseTablesSize =
ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
Expand Down
6 changes: 3 additions & 3 deletions lib/compress/zstd_compress_literals.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2,
unsigned suspectUncompressible)
unsigned suspectUncompressible, HUF_depth_mode depthMode)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
Expand Down Expand Up @@ -128,7 +128,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = (strategy < ZSTD_lazy) ? srcSize <= 1024 : 0;
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, unsigned);
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, unsigned, HUF_depth_mode);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
Expand All @@ -138,7 +138,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable,
&repeat, preferRepeat,
bmi2, suspectUncompressible);
bmi2, suspectUncompressible, depthMode);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table");
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/zstd_compress_literals.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2,
unsigned suspectUncompressible);
unsigned suspectUncompressible, HUF_depth_mode depthMode);

#endif /* ZSTD_COMPRESS_LITERALS_H */
4 changes: 3 additions & 1 deletion tests/fuzz/huf_round_trip.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(tableLog) * sizeof(HUF_DTable));
dt[0] = tableLog * 0x01000001;

tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol);
HUF_depth_mode depthMode = rand() & 1 ? HUF_depth_fast : HUF_depth_optimal;

tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol, wksp, wkspSize, ct, count, depthMode);
FUZZ_ASSERT(tableLog <= 12);
tableLog = HUF_buildCTable_wksp(ct, count, maxSymbol, tableLog, wksp, wkspSize);
FUZZ_ZASSERT(tableLog);
Expand Down
Loading