Skip to content

Commit 1901a5f

Browse files
luyu6056klauspost
authored andcommitted
use quickSort from sort.Sort (#207)
About 5-15% speedup. More on smaller blocks.
1 parent a649712 commit 1901a5f

File tree

3 files changed

+381
-38
lines changed

3 files changed

+381
-38
lines changed

flate/huffman_code.go

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package flate
77
import (
88
"math"
99
"math/bits"
10-
"sort"
1110
)
1211

1312
const (
@@ -25,8 +24,6 @@ type huffmanEncoder struct {
2524
codes []hcode
2625
freqcache []literalNode
2726
bitCount [17]int32
28-
lns byLiteral // stored to avoid repeated allocation in generate
29-
lfs byFreq // stored to avoid repeated allocation in generate
3027
}
3128

3229
type literalNode struct {
@@ -270,7 +267,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
270267
// assigned in literal order (not frequency order).
271268
chunk := list[len(list)-int(bits):]
272269

273-
h.lns.sort(chunk)
270+
sortByLiteral(chunk)
274271
for _, node := range chunk {
275272
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
276273
code++
@@ -315,47 +312,14 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
315312
}
316313
return
317314
}
318-
h.lfs.sort(list)
315+
sortByFreq(list)
319316

320317
// Get the number of literals for each bit count
321318
bitCount := h.bitCounts(list, maxBits)
322319
// And do the assignment
323320
h.assignEncodingAndSize(bitCount, list)
324321
}
325322

326-
type byLiteral []literalNode
327-
328-
func (s *byLiteral) sort(a []literalNode) {
329-
*s = byLiteral(a)
330-
sort.Sort(s)
331-
}
332-
333-
func (s byLiteral) Len() int { return len(s) }
334-
335-
func (s byLiteral) Less(i, j int) bool {
336-
return s[i].literal < s[j].literal
337-
}
338-
339-
func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
340-
341-
type byFreq []literalNode
342-
343-
func (s *byFreq) sort(a []literalNode) {
344-
*s = byFreq(a)
345-
sort.Sort(s)
346-
}
347-
348-
func (s byFreq) Len() int { return len(s) }
349-
350-
func (s byFreq) Less(i, j int) bool {
351-
if s[i].freq == s[j].freq {
352-
return s[i].literal < s[j].literal
353-
}
354-
return s[i].freq < s[j].freq
355-
}
356-
357-
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
358-
359323
// histogramSize accumulates a histogram of b in h.
360324
// An estimated size in bits is returned.
361325
// Unassigned values are assigned '1' in the histogram.

flate/huffman_sortByFreq.go

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
// Copyright 2009 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package flate
6+
7+
// Sort sorts data.
8+
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
9+
// data.Less and data.Swap. The sort is not guaranteed to be stable.
10+
func sortByFreq(data []literalNode) {
11+
n := len(data)
12+
quickSortByFreq(data, 0, n, maxDepth(n))
13+
}
14+
15+
func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
16+
for b-a > 12 { // Use ShellSort for slices <= 12 elements
17+
if maxDepth == 0 {
18+
heapSort(data, a, b)
19+
return
20+
}
21+
maxDepth--
22+
mlo, mhi := doPivotByFreq(data, a, b)
23+
// Avoiding recursion on the larger subproblem guarantees
24+
// a stack depth of at most lg(b-a).
25+
if mlo-a < b-mhi {
26+
quickSortByFreq(data, a, mlo, maxDepth)
27+
a = mhi // i.e., quickSortByFreq(data, mhi, b)
28+
} else {
29+
quickSortByFreq(data, mhi, b, maxDepth)
30+
b = mlo // i.e., quickSortByFreq(data, a, mlo)
31+
}
32+
}
33+
if b-a > 1 {
34+
// Do ShellSort pass with gap 6
35+
// It could be written in this simplified form cause b-a <= 12
36+
for i := a + 6; i < b; i++ {
37+
if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
38+
data[i], data[i-6] = data[i-6], data[i]
39+
}
40+
}
41+
insertionSortByFreq(data, a, b)
42+
}
43+
}
44+
45+
// siftDownByFreq implements the heap property on data[lo, hi).
46+
// first is an offset into the array where the root of the heap lies.
47+
func siftDownByFreq(data []literalNode, lo, hi, first int) {
48+
root := lo
49+
for {
50+
child := 2*root + 1
51+
if child >= hi {
52+
break
53+
}
54+
if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
55+
child++
56+
}
57+
if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
58+
return
59+
}
60+
data[first+root], data[first+child] = data[first+child], data[first+root]
61+
root = child
62+
}
63+
}
64+
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
65+
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
66+
if hi-lo > 40 {
67+
// Tukey's ``Ninther,'' median of three medians of three.
68+
s := (hi - lo) / 8
69+
medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
70+
medianOfThreeSortByFreq(data, m, m-s, m+s)
71+
medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
72+
}
73+
medianOfThreeSortByFreq(data, lo, m, hi-1)
74+
75+
// Invariants are:
76+
// data[lo] = pivot (set up by ChoosePivot)
77+
// data[lo < i < a] < pivot
78+
// data[a <= i < b] <= pivot
79+
// data[b <= i < c] unexamined
80+
// data[c <= i < hi-1] > pivot
81+
// data[hi-1] >= pivot
82+
pivot := lo
83+
a, c := lo+1, hi-1
84+
85+
for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
86+
}
87+
b := a
88+
for {
89+
for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
90+
}
91+
for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
92+
}
93+
if b >= c {
94+
break
95+
}
96+
// data[b] > pivot; data[c-1] <= pivot
97+
data[b], data[c-1] = data[c-1], data[b]
98+
b++
99+
c--
100+
}
101+
// If hi-c<3 then there are duplicates (by property of median of nine).
102+
// Let's be a bit more conservative, and set border to 5.
103+
protect := hi-c < 5
104+
if !protect && hi-c < (hi-lo)/4 {
105+
// Lets test some points for equality to pivot
106+
dups := 0
107+
if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
108+
data[c], data[hi-1] = data[hi-1], data[c]
109+
c++
110+
dups++
111+
}
112+
if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
113+
b--
114+
dups++
115+
}
116+
// m-lo = (hi-lo)/2 > 6
117+
// b-lo > (hi-lo)*3/4-1 > 8
118+
// ==> m < b ==> data[m] <= pivot
119+
if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
120+
data[m], data[b-1] = data[b-1], data[m]
121+
b--
122+
dups++
123+
}
124+
// if at least 2 points are equal to pivot, assume skewed distribution
125+
protect = dups > 1
126+
}
127+
if protect {
128+
// Protect against a lot of duplicates
129+
// Add invariant:
130+
// data[a <= i < b] unexamined
131+
// data[b <= i < c] = pivot
132+
for {
133+
for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
134+
}
135+
for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
136+
}
137+
if a >= b {
138+
break
139+
}
140+
// data[a] == pivot; data[b-1] < pivot
141+
data[a], data[b-1] = data[b-1], data[a]
142+
a++
143+
b--
144+
}
145+
}
146+
// Swap pivot into middle
147+
data[pivot], data[b-1] = data[b-1], data[pivot]
148+
return b - 1, c
149+
}
150+
151+
// Insertion sort
152+
func insertionSortByFreq(data []literalNode, a, b int) {
153+
for i := a + 1; i < b; i++ {
154+
for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
155+
data[j], data[j-1] = data[j-1], data[j]
156+
}
157+
}
158+
}
159+
160+
// quickSortByFreq, loosely following Bentley and McIlroy,
161+
// ``Engineering a Sort Function,'' SP&E November 1993.
162+
163+
// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
164+
func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
165+
// sort 3 elements
166+
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
167+
data[m1], data[m0] = data[m0], data[m1]
168+
}
169+
// data[m0] <= data[m1]
170+
if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
171+
data[m2], data[m1] = data[m1], data[m2]
172+
// data[m0] <= data[m2] && data[m1] < data[m2]
173+
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
174+
data[m1], data[m0] = data[m0], data[m1]
175+
}
176+
}
177+
// now data[m0] <= data[m1] <= data[m2]
178+
}

0 commit comments

Comments
 (0)