Skip to content

Commit 8c21c18

Browse files
committed
Improve Huffman sorting algorithm
1 parent d2b5e54 commit 8c21c18

File tree

3 files changed

+343
-235
lines changed

3 files changed

+343
-235
lines changed

lib/common/huf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
9090
/** HUF_compress4X_wksp() :
9191
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
9292
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
93-
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
93+
#define HUF_WORKSPACE_SIZE ((6 << 10) + 512 /* for sorting scratch space */ )
9494
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
9595
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
9696
const void* src, size_t srcSize,

lib/compress/huf_compress.c

Lines changed: 129 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -368,67 +368,175 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
368368
}
369369

370370
typedef struct {
371-
U32 base;
372-
U32 curr;
371+
U16 base;
372+
U16 curr;
373373
} rankPos;
374374

375375
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
376376

377-
#define RANK_POSITION_TABLE_SIZE 32
377+
/* Number of buckets available for HUF_sort() */
378+
#define RANK_POSITION_TABLE_SIZE 128
378379

379380
typedef struct {
380381
huffNodeTable huffNodeTbl;
381382
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
382383
} HUF_buildCTable_wksp_tables;
383384

385+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
386+
* Strategy is to use as many buckets as possible for representing distinct
387+
* counts while using the remainder to represent all counts up to HUF_BLOCKSIZE_MAX
388+
* using log2 bucketing.
389+
*
390+
* To satisfy this requirement for 128 buckets, we can do the following:
391+
* Let buckets 0-114 represent distinct counts of [0, 114]
392+
* Let buckets 115 to 126 represent counts of [115, HUF_BLOCKSIZE_MAX]. (the final bucket 127 must remain empty)
393+
*
394+
* Note that we don't actually need 17 buckets (assuming 2^17 maxcount) for log2 bucketing since
395+
* the first few buckets in the log2 bucketing representation are already covered by the distinct count bucketing.
396+
*/
397+
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - BIT_highbit32(HUF_BLOCKSIZE_MAX) - 1
398+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN)
399+
400+
/* Return the appropriate bucket index for a given count. See definition of
401+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
402+
*/
403+
static U32 HUF_getIndex(U32 const count) {
404+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
405+
? count
406+
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
407+
}
408+
409+
/* Helper swap function for HUF_quickSortPartition() */
410+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
411+
nodeElt tmp = *a;
412+
*a = *b;
413+
*b = tmp;
414+
}
415+
416+
/* Returns 0 if the huffNode array is not sorted by descending count */
417+
UNUSED_ATTR
418+
static int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
419+
U32 i;
420+
for (i = 1; i < maxSymbolValue1; ++i) {
421+
if (huffNode[i].count > huffNode[i-1].count) {
422+
return 0;
423+
}
424+
}
425+
return 1;
426+
}
427+
428+
/* Insertion sort by descending order */
429+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
430+
int i;
431+
int const size = high-low+1;
432+
huffNode += low;
433+
for (i = 1; i < size; ++i) {
434+
nodeElt const key = huffNode[i];
435+
int j = i - 1;
436+
while (j >= 0 && huffNode[j].count < key.count) {
437+
huffNode[j + 1] = huffNode[j];
438+
j--;
439+
}
440+
huffNode[j + 1] = key;
441+
}
442+
}
443+
444+
/* Pivot helper function for quicksort. */
445+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
446+
/* Simply select rightmost element as pivot. "Better" selectors like
447+
* median-of-three don't experimentally appear to have any benefit.
448+
*/
449+
U32 const pivot = arr[high].count;
450+
int i = low - 1;
451+
int j = low;
452+
for ( ; j < high; j++) {
453+
if (arr[j].count > pivot) {
454+
i++;
455+
HUF_swapNodes(&arr[i], &arr[j]);
456+
}
457+
}
458+
HUF_swapNodes(&arr[i + 1], &arr[high]);
459+
return i + 1;
460+
}
461+
462+
/* Classic quicksort by descending with partially iterative calls
463+
* to reduce worst case callstack size.
464+
*/
465+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
466+
int const kInsertionSortThreshold = 8;
467+
if (high - low < kInsertionSortThreshold) {
468+
HUF_insertionSort(arr, low, high);
469+
return;
470+
}
471+
while (low < high) {
472+
int const idx = HUF_quickSortPartition(arr, low, high);
473+
if (idx - low < high - idx) {
474+
HUF_simpleQuickSort(arr, low, idx - 1);
475+
low = idx + 1;
476+
} else {
477+
HUF_simpleQuickSort(arr, idx + 1, high);
478+
high = idx - 1;
479+
}
480+
}
481+
}
482+
384483
/**
385484
* HUF_sort():
386485
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
486+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
387487
*
388488
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
389489
* Must have (maxSymbolValue + 1) entries.
390490
* @param[in] count Histogram of the symbols.
391491
* @param[in] maxSymbolValue Maximum symbol value.
392492
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
393493
*/
394-
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
395-
{
396-
int n;
397-
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
494+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
495+
U32 n;
496+
U32 const maxSymbolValue1 = maxSymbolValue+1;
398497

399498
/* Compute base and set curr to base.
400-
* For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
401-
* Then 2^lowerRank <= count[n]+1 <= 2^rank.
499+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
500+
* See HUF_getIndex to see bucketing strategy.
402501
* We attribute each symbol to lowerRank's base value, because we want to know where
403502
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
404503
*/
405504
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
406505
for (n = 0; n < maxSymbolValue1; ++n) {
407-
U32 lowerRank = BIT_highbit32(count[n] + 1);
506+
U32 lowerRank = HUF_getIndex(count[n]);
507+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
408508
rankPosition[lowerRank].base++;
409509
}
510+
410511
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
512+
/* Set up the rankPosition table */
411513
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
412514
rankPosition[n-1].base += rankPosition[n].base;
413515
rankPosition[n-1].curr = rankPosition[n-1].base;
414516
}
415-
/* Sort */
517+
518+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
416519
for (n = 0; n < maxSymbolValue1; ++n) {
417520
U32 const c = count[n];
418-
U32 const r = BIT_highbit32(c+1) + 1;
419-
U32 pos = rankPosition[r].curr++;
420-
/* Insert into the correct position in the rank.
421-
* We have at most 256 symbols, so this insertion should be fine.
422-
*/
423-
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
424-
huffNode[pos] = huffNode[pos-1];
425-
pos--;
426-
}
521+
U32 const r = HUF_getIndex(c) + 1;
522+
U32 const pos = rankPosition[r].curr++;
523+
assert(pos < maxSymbolValue1);
427524
huffNode[pos].count = c;
428525
huffNode[pos].byte = (BYTE)n;
429526
}
430-
}
431527

528+
/* Sort each bucket. */
529+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
530+
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
531+
U32 const bucketStartIdx = rankPosition[n].base;
532+
if (bucketSize > 1) {
533+
assert(bucketStartIdx < maxSymbolValue1);
534+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
535+
}
536+
}
537+
538+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
539+
}
432540

433541
/** HUF_buildCTable_wksp() :
434542
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.

0 commit comments

Comments
 (0)