|
| 1 | +// Copyright 2009 The Go Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +package flate |
| 6 | + |
| 7 | +// Sort sorts data. |
| 8 | +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to |
| 9 | +// data.Less and data.Swap. The sort is not guaranteed to be stable. |
| 10 | +func sortByFreq(data []literalNode) { |
| 11 | + n := len(data) |
| 12 | + quickSortByFreq(data, 0, n, maxDepth(n)) |
| 13 | +} |
| 14 | + |
| 15 | +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { |
| 16 | + for b-a > 12 { // Use ShellSort for slices <= 12 elements |
| 17 | + if maxDepth == 0 { |
| 18 | + heapSort(data, a, b) |
| 19 | + return |
| 20 | + } |
| 21 | + maxDepth-- |
| 22 | + mlo, mhi := doPivotByFreq(data, a, b) |
| 23 | + // Avoiding recursion on the larger subproblem guarantees |
| 24 | + // a stack depth of at most lg(b-a). |
| 25 | + if mlo-a < b-mhi { |
| 26 | + quickSortByFreq(data, a, mlo, maxDepth) |
| 27 | + a = mhi // i.e., quickSortByFreq(data, mhi, b) |
| 28 | + } else { |
| 29 | + quickSortByFreq(data, mhi, b, maxDepth) |
| 30 | + b = mlo // i.e., quickSortByFreq(data, a, mlo) |
| 31 | + } |
| 32 | + } |
| 33 | + if b-a > 1 { |
| 34 | + // Do ShellSort pass with gap 6 |
| 35 | + // It could be written in this simplified form cause b-a <= 12 |
| 36 | + for i := a + 6; i < b; i++ { |
| 37 | + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { |
| 38 | + data[i], data[i-6] = data[i-6], data[i] |
| 39 | + } |
| 40 | + } |
| 41 | + insertionSortByFreq(data, a, b) |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +// siftDownByFreq implements the heap property on data[lo, hi). |
| 46 | +// first is an offset into the array where the root of the heap lies. |
| 47 | +func siftDownByFreq(data []literalNode, lo, hi, first int) { |
| 48 | + root := lo |
| 49 | + for { |
| 50 | + child := 2*root + 1 |
| 51 | + if child >= hi { |
| 52 | + break |
| 53 | + } |
| 54 | + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { |
| 55 | + child++ |
| 56 | + } |
| 57 | + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { |
| 58 | + return |
| 59 | + } |
| 60 | + data[first+root], data[first+child] = data[first+child], data[first+root] |
| 61 | + root = child |
| 62 | + } |
| 63 | +} |
| 64 | +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { |
| 65 | + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. |
| 66 | + if hi-lo > 40 { |
| 67 | + // Tukey's ``Ninther,'' median of three medians of three. |
| 68 | + s := (hi - lo) / 8 |
| 69 | + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) |
| 70 | + medianOfThreeSortByFreq(data, m, m-s, m+s) |
| 71 | + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) |
| 72 | + } |
| 73 | + medianOfThreeSortByFreq(data, lo, m, hi-1) |
| 74 | + |
| 75 | + // Invariants are: |
| 76 | + // data[lo] = pivot (set up by ChoosePivot) |
| 77 | + // data[lo < i < a] < pivot |
| 78 | + // data[a <= i < b] <= pivot |
| 79 | + // data[b <= i < c] unexamined |
| 80 | + // data[c <= i < hi-1] > pivot |
| 81 | + // data[hi-1] >= pivot |
| 82 | + pivot := lo |
| 83 | + a, c := lo+1, hi-1 |
| 84 | + |
| 85 | + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { |
| 86 | + } |
| 87 | + b := a |
| 88 | + for { |
| 89 | + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot |
| 90 | + } |
| 91 | + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot |
| 92 | + } |
| 93 | + if b >= c { |
| 94 | + break |
| 95 | + } |
| 96 | + // data[b] > pivot; data[c-1] <= pivot |
| 97 | + data[b], data[c-1] = data[c-1], data[b] |
| 98 | + b++ |
| 99 | + c-- |
| 100 | + } |
| 101 | + // If hi-c<3 then there are duplicates (by property of median of nine). |
| 102 | + // Let's be a bit more conservative, and set border to 5. |
| 103 | + protect := hi-c < 5 |
| 104 | + if !protect && hi-c < (hi-lo)/4 { |
| 105 | + // Lets test some points for equality to pivot |
| 106 | + dups := 0 |
| 107 | + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot |
| 108 | + data[c], data[hi-1] = data[hi-1], data[c] |
| 109 | + c++ |
| 110 | + dups++ |
| 111 | + } |
| 112 | + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot |
| 113 | + b-- |
| 114 | + dups++ |
| 115 | + } |
| 116 | + // m-lo = (hi-lo)/2 > 6 |
| 117 | + // b-lo > (hi-lo)*3/4-1 > 8 |
| 118 | + // ==> m < b ==> data[m] <= pivot |
| 119 | + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot |
| 120 | + data[m], data[b-1] = data[b-1], data[m] |
| 121 | + b-- |
| 122 | + dups++ |
| 123 | + } |
| 124 | + // if at least 2 points are equal to pivot, assume skewed distribution |
| 125 | + protect = dups > 1 |
| 126 | + } |
| 127 | + if protect { |
| 128 | + // Protect against a lot of duplicates |
| 129 | + // Add invariant: |
| 130 | + // data[a <= i < b] unexamined |
| 131 | + // data[b <= i < c] = pivot |
| 132 | + for { |
| 133 | + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot |
| 134 | + } |
| 135 | + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot |
| 136 | + } |
| 137 | + if a >= b { |
| 138 | + break |
| 139 | + } |
| 140 | + // data[a] == pivot; data[b-1] < pivot |
| 141 | + data[a], data[b-1] = data[b-1], data[a] |
| 142 | + a++ |
| 143 | + b-- |
| 144 | + } |
| 145 | + } |
| 146 | + // Swap pivot into middle |
| 147 | + data[pivot], data[b-1] = data[b-1], data[pivot] |
| 148 | + return b - 1, c |
| 149 | +} |
| 150 | + |
| 151 | +// Insertion sort |
| 152 | +func insertionSortByFreq(data []literalNode, a, b int) { |
| 153 | + for i := a + 1; i < b; i++ { |
| 154 | + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { |
| 155 | + data[j], data[j-1] = data[j-1], data[j] |
| 156 | + } |
| 157 | + } |
| 158 | +} |
| 159 | + |
| 160 | +// quickSortByFreq, loosely following Bentley and McIlroy, |
| 161 | +// ``Engineering a Sort Function,'' SP&E November 1993. |
| 162 | + |
| 163 | +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. |
| 164 | +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { |
| 165 | + // sort 3 elements |
| 166 | + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { |
| 167 | + data[m1], data[m0] = data[m0], data[m1] |
| 168 | + } |
| 169 | + // data[m0] <= data[m1] |
| 170 | + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { |
| 171 | + data[m2], data[m1] = data[m1], data[m2] |
| 172 | + // data[m0] <= data[m2] && data[m1] < data[m2] |
| 173 | + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { |
| 174 | + data[m1], data[m0] = data[m0], data[m1] |
| 175 | + } |
| 176 | + } |
| 177 | + // now data[m0] <= data[m1] <= data[m2] |
| 178 | +} |
0 commit comments