Skip to content

Commit d8a0797

Browse files
committed
[fuzz] Add Huffman round trip fuzzer
* Add a Huffman round trip fuzzer * Fix two minor bugs in Huffman that aren't exposed in zstd - Incorrect weight comparison (weights are allowed to be equal to table log). - HUF_compress1X_usingCTable_internal() can return compressed size >= source size, so the assert that `cSize <= 65535` isn't correct, and it needs to be checked instead.
1 parent 46f2710 commit d8a0797

File tree

7 files changed

+158
-11
lines changed

7 files changed

+158
-11
lines changed

lib/common/entropy_common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
299299
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
300300
weightTotal = 0;
301301
{ U32 n; for (n=0; n<oSize; n++) {
302-
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
302+
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
303303
rankStats[huffWeight[n]]++;
304304
weightTotal += (1 << huffWeight[n]) >> 1;
305305
} }

lib/common/huf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
190190
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
191191
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
192192
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
193+
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
193194
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
194195
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
195196

@@ -303,6 +304,7 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
303304
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
304305
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
305306
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
307+
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
306308
/** HUF_compress1X_repeat() :
307309
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
308310
* If it uses hufTable it does not modify hufTable or repeat.

lib/compress/huf_compress.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,12 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
965965

966966
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
967967
{
968-
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
968+
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
969+
}
970+
971+
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
972+
{
973+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
969974
}
970975

971976
static size_t
@@ -986,26 +991,23 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
986991

987992
assert(op <= oend);
988993
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
989-
if (cSize==0) return 0;
990-
assert(cSize <= 65535);
994+
if (cSize == 0 || cSize > 65535) return 0;
991995
MEM_writeLE16(ostart, (U16)cSize);
992996
op += cSize;
993997
}
994998

995999
ip += segmentSize;
9961000
assert(op <= oend);
9971001
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
998-
if (cSize==0) return 0;
999-
assert(cSize <= 65535);
1002+
if (cSize == 0 || cSize > 65535) return 0;
10001003
MEM_writeLE16(ostart+2, (U16)cSize);
10011004
op += cSize;
10021005
}
10031006

10041007
ip += segmentSize;
10051008
assert(op <= oend);
10061009
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1007-
if (cSize==0) return 0;
1008-
assert(cSize <= 65535);
1010+
if (cSize == 0 || cSize > 65535) return 0;
10091011
MEM_writeLE16(ostart+4, (U16)cSize);
10101012
op += cSize;
10111013
}
@@ -1014,7 +1016,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
10141016
assert(op <= oend);
10151017
assert(ip <= iend);
10161018
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
1017-
if (cSize==0) return 0;
1019+
if (cSize == 0 || cSize > 65535) return 0;
10181020
op += cSize;
10191021
}
10201022

@@ -1023,7 +1025,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
10231025

10241026
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
10251027
{
1026-
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1028+
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1029+
}
1030+
1031+
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1032+
{
1033+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
10271034
}
10281035

10291036
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;

tests/fuzz/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ decompress_dstSize_tooSmall
1717
fse_read_ncount
1818
sequence_compression_api
1919
seekable_roundtrip
20+
huf_round_trip
2021
fuzz-*.log
2122
rt_lib_*
2223
d_lib_*

tests/fuzz/Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ FUZZ_TARGETS := \
103103
decompress_dstSize_tooSmall \
104104
fse_read_ncount \
105105
sequence_compression_api \
106-
seekable_roundtrip
106+
seekable_roundtrip \
107+
huf_round_trip
107108

108109
all: libregression.a $(FUZZ_TARGETS)
109110

@@ -200,6 +201,9 @@ sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequenc
200201
seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o
201202
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(LIB_FUZZING_ENGINE) -o $@
202203

204+
huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
205+
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o $(LIB_FUZZING_ENGINE) -o $@
206+
203207
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
204208
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
205209

tests/fuzz/fuzz.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
6363
'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
6464
'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
6565
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
66+
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
6667
}
6768
TARGETS = list(TARGET_INFO.keys())
6869
ALL_TARGETS = TARGETS + ['all']

tests/fuzz/huf_round_trip.c

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Copyright (c) Facebook, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under both the BSD-style license (found in the
6+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7+
* in the COPYING file in the root directory of this source tree).
8+
* You may select, at your option, one of the above-listed licenses.
9+
*/
10+
11+
/**
12+
* This fuzz target performs a zstd round-trip test (compress & decompress),
13+
* compares the result with the original, and calls abort() on corruption.
14+
*/
15+
16+
#define HUF_STATIC_LINKING_ONLY
17+
18+
#include <stddef.h>
19+
#include <stdlib.h>
20+
#include <stdio.h>
21+
#include <string.h>
22+
#include "common/cpu.h"
23+
#include "compress/hist.h"
24+
#include "common/huf.h"
25+
#include "fuzz_helpers.h"
26+
#include "fuzz_data_producer.h"
27+
28+
static size_t adjustTableLog(size_t tableLog, size_t maxSymbol)
29+
{
30+
size_t const alphabetSize = maxSymbol + 1;
31+
size_t minTableLog = BIT_highbit32(alphabetSize) + 1;
32+
if ((alphabetSize & (alphabetSize - 1)) != 0) {
33+
++minTableLog;
34+
}
35+
assert(minTableLog <= 9);
36+
if (tableLog < minTableLog)
37+
return minTableLog;
38+
else
39+
return tableLog;
40+
}
41+
42+
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
43+
{
44+
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
45+
/* Select random parameters: #streams, X1 or X2 decoding, bmi2 */
46+
int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1);
47+
int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1);
48+
int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1);
49+
/* Select a random cBufSize - it may be too small */
50+
size_t const cBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 4 * size);
51+
/* Select a random tableLog - we'll adjust it up later */
52+
size_t tableLog = FUZZ_dataProducer_uint32Range(producer, 1, 12);
53+
size_t const kMaxSize = 256 * 1024;
54+
size = FUZZ_dataProducer_remainingBytes(producer);
55+
if (size > kMaxSize)
56+
size = kMaxSize;
57+
58+
if (size <= 1) {
59+
FUZZ_dataProducer_free(producer);
60+
return 0;
61+
}
62+
63+
uint32_t maxSymbol = 255;
64+
65+
U32 count[256];
66+
size_t const mostFrequent = HIST_count(count, &maxSymbol, src, size);
67+
FUZZ_ZASSERT(mostFrequent);
68+
if (mostFrequent == size) {
69+
/* RLE */
70+
FUZZ_dataProducer_free(producer);
71+
return 0;
72+
73+
}
74+
FUZZ_ASSERT(maxSymbol <= 255);
75+
tableLog = adjustTableLog(tableLog, maxSymbol);
76+
77+
size_t const wkspSize = HUF_WORKSPACE_SIZE;
78+
void* wksp = FUZZ_malloc(wkspSize);
79+
void* rBuf = FUZZ_malloc(size);
80+
void* cBuf = FUZZ_malloc(cBufSize);
81+
HUF_CElt* ct = (HUF_CElt*)FUZZ_malloc(HUF_CTABLE_SIZE(maxSymbol));
82+
HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(tableLog) * sizeof(HUF_DTable));
83+
dt[0] = tableLog * 0x01000001;
84+
85+
tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol);
86+
FUZZ_ASSERT(tableLog <= 12);
87+
tableLog = HUF_buildCTable_wksp(ct, count, maxSymbol, tableLog, wksp, wkspSize);
88+
FUZZ_ZASSERT(tableLog);
89+
size_t const tableSize = HUF_writeCTable_wksp(cBuf, cBufSize, ct, maxSymbol, tableLog, wksp, wkspSize);
90+
if (ERR_isError(tableSize)) {
91+
/* Errors on uncompressible data or cBufSize too small */
92+
goto _out;
93+
}
94+
FUZZ_ZASSERT(tableSize);
95+
if (symbols == 0) {
96+
FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2));
97+
} else {
98+
size_t const ret = HUF_readDTableX2_wksp(dt, cBuf, tableSize, wksp, wkspSize);
99+
if (ERR_getErrorCode(ret) == ZSTD_error_tableLog_tooLarge) {
100+
FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2));
101+
} else {
102+
FUZZ_ZASSERT(ret);
103+
}
104+
}
105+
106+
size_t cSize;
107+
size_t rSize;
108+
if (streams == 0) {
109+
cSize = HUF_compress1X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2);
110+
FUZZ_ZASSERT(cSize);
111+
if (cSize != 0)
112+
rSize = HUF_decompress1X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2);
113+
} else {
114+
cSize = HUF_compress4X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2);
115+
FUZZ_ZASSERT(cSize);
116+
if (cSize != 0)
117+
rSize = HUF_decompress4X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2);
118+
}
119+
if (cSize != 0) {
120+
FUZZ_ZASSERT(rSize);
121+
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
122+
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!");
123+
}
124+
_out:
125+
free(rBuf);
126+
free(cBuf);
127+
free(ct);
128+
free(dt);
129+
free(wksp);
130+
FUZZ_dataProducer_free(producer);
131+
return 0;
132+
}

0 commit comments

Comments
 (0)