Skip to content

Commit 37e81bd

Browse files
authored
s2: Add pure Go snappy block compressor (#245)
* s2: Add pure Go snappy block compressor
1 parent 7728e3a commit 37e81bd

File tree

6 files changed

+7610
-7309
lines changed

6 files changed

+7610
-7309
lines changed

s2/encode.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,50 @@ func EncodeBetter(dst, src []byte) []byte {
100100
return dst[:d]
101101
}
102102

103+
// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
104+
// slice of dst if dst was large enough to hold the entire encoded block.
105+
// Otherwise, a newly allocated slice will be returned.
106+
//
107+
// The output is Snappy compatible and will likely decompress faster.
108+
//
109+
// The dst and src must not overlap. It is valid to pass a nil dst.
110+
//
111+
// The blocks will require the same amount of memory to decode as encoding,
112+
// and does not make for concurrent decoding.
113+
// Also note that blocks do not contain CRC information, so corruption may be undetected.
114+
//
115+
// If you need to encode larger amounts of data, consider using
116+
// the streaming interface which gives all of these features.
117+
func EncodeSnappy(dst, src []byte) []byte {
118+
if n := MaxEncodedLen(len(src)); n < 0 {
119+
panic(ErrTooLarge)
120+
} else if cap(dst) < n {
121+
dst = make([]byte, n)
122+
} else {
123+
dst = dst[:n]
124+
}
125+
126+
// The block starts with the varint-encoded length of the decompressed bytes.
127+
d := binary.PutUvarint(dst, uint64(len(src)))
128+
129+
if len(src) == 0 {
130+
return dst[:d]
131+
}
132+
if len(src) < minNonLiteralBlockSize {
133+
d += emitLiteral(dst[d:], src)
134+
return dst[:d]
135+
}
136+
137+
n := encodeBlockSnappy(dst[d:], src)
138+
if n > 0 {
139+
d += n
140+
return dst[:d]
141+
}
142+
// Not compressible
143+
d += emitLiteral(dst[d:], src)
144+
return dst[:d]
145+
}
146+
103147
// ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
104148
// If the destination is nil or too small, a new will be allocated.
105149
// The blocks are not validated, so garbage in = garbage out.

s2/encode_amd64.go

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
package s2
66

7-
import "encoding/binary"
8-
97
func init() {
108
avxAvailable = cpu.avx()
119
}
@@ -57,51 +55,7 @@ func encodeBlock(dst, src []byte) (d int) {
5755
return encodeBlockAsm8B(dst, src)
5856
}
5957

60-
// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
61-
// slice of dst if dst was large enough to hold the entire encoded block.
62-
// Otherwise, a newly allocated slice will be returned.
63-
//
64-
// The output is Snappy compatible and will likely decompress faster.
65-
//
66-
// The dst and src must not overlap. It is valid to pass a nil dst.
67-
//
68-
// The blocks will require the same amount of memory to decode as encoding,
69-
// and does not make for concurrent decoding.
70-
// Also note that blocks do not contain CRC information, so corruption may be undetected.
71-
//
72-
// If you need to encode larger amounts of data, consider using
73-
// the streaming interface which gives all of these features.
74-
func EncodeSnappy(dst, src []byte) []byte {
75-
if n := MaxEncodedLen(len(src)); n < 0 {
76-
panic(ErrTooLarge)
77-
} else if cap(dst) < n {
78-
dst = make([]byte, n)
79-
} else {
80-
dst = dst[:n]
81-
}
82-
83-
// The block starts with the varint-encoded length of the decompressed bytes.
84-
d := binary.PutUvarint(dst, uint64(len(src)))
85-
86-
if len(src) == 0 {
87-
return dst[:d]
88-
}
89-
if len(src) < minNonLiteralBlockSize {
90-
d += emitLiteral(dst[d:], src)
91-
return dst[:d]
92-
}
93-
94-
n := encodeBlockSnappy(dst[d:], src)
95-
if n > 0 {
96-
d += n
97-
return dst[:d]
98-
}
99-
// Not compressible
100-
d += emitLiteral(dst[d:], src)
101-
return dst[:d]
102-
}
103-
104-
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
58+
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
10559
// assumes that the varint-encoded length of the decompressed bytes has already
10660
// been written.
10761
//

s2/encode_go.go

Lines changed: 245 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,10 @@
33
package s2
44

55
import (
6+
"bytes"
67
"math/bits"
7-
8-
"github.com/klauspost/compress/snappy"
98
)
109

11-
// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
12-
// slice of dst if dst was large enough to hold the entire encoded block.
13-
// Otherwise, a newly allocated slice will be returned.
14-
//
15-
// The output is Snappy compatible and will likely decompress faster.
16-
//
17-
// The dst and src must not overlap. It is valid to pass a nil dst.
18-
//
19-
// The blocks will require the same amount of memory to decode as encoding,
20-
// and does not make for concurrent decoding.
21-
// Also note that blocks do not contain CRC information, so corruption may be undetected.
22-
//
23-
// If you need to encode larger amounts of data, consider using
24-
// the streaming interface which gives all of these features.
25-
func EncodeSnappy(dst, src []byte) []byte {
26-
return snappy.Encode(dst, src)
27-
}
28-
2910
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
3011
// assumes that the varint-encoded length of the decompressed bytes has already
3112
// been written.
@@ -188,6 +169,65 @@ func emitCopy(dst []byte, offset, length int) int {
188169
return 2
189170
}
190171

172+
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
173+
//
174+
// It assumes that:
175+
// dst is long enough to hold the encoded bytes
176+
// 1 <= offset && offset <= math.MaxUint32
177+
// 4 <= length && length <= 1 << 24
178+
func emitCopyNoRepeat(dst []byte, offset, length int) int {
179+
if offset >= 65536 {
180+
i := 0
181+
if length > 64 {
182+
// Emit a length 64 copy, encoded as 5 bytes.
183+
dst[4] = uint8(offset >> 24)
184+
dst[3] = uint8(offset >> 16)
185+
dst[2] = uint8(offset >> 8)
186+
dst[1] = uint8(offset)
187+
dst[0] = 63<<2 | tagCopy4
188+
length -= 64
189+
if length >= 4 {
190+
// Emit remaining as repeats
191+
return 5 + emitCopyNoRepeat(dst[5:], offset, length)
192+
}
193+
i = 5
194+
}
195+
if length == 0 {
196+
return i
197+
}
198+
// Emit a copy, offset encoded as 4 bytes.
199+
dst[i+0] = uint8(length-1)<<2 | tagCopy4
200+
dst[i+1] = uint8(offset)
201+
dst[i+2] = uint8(offset >> 8)
202+
dst[i+3] = uint8(offset >> 16)
203+
dst[i+4] = uint8(offset >> 24)
204+
return i + 5
205+
}
206+
207+
// Offset no more than 2 bytes.
208+
if length > 64 {
209+
// Emit a length 60 copy, encoded as 3 bytes.
210+
// Emit remaining as repeat value (minimum 4 bytes).
211+
dst[2] = uint8(offset >> 8)
212+
dst[1] = uint8(offset)
213+
dst[0] = 59<<2 | tagCopy2
214+
length -= 60
215+
// Emit remaining as repeats, at least 4 bytes remain.
216+
return 3 + emitCopyNoRepeat(dst[3:], offset, length)
217+
}
218+
if length >= 12 || offset >= 2048 {
219+
// Emit the remaining copy, encoded as 3 bytes.
220+
dst[2] = uint8(offset >> 8)
221+
dst[1] = uint8(offset)
222+
dst[0] = uint8(length-1)<<2 | tagCopy2
223+
return 3
224+
}
225+
// Emit the remaining copy, encoded as 2 bytes.
226+
dst[1] = uint8(offset)
227+
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
228+
return 2
229+
}
230+
191231
// matchLen returns how many bytes match in a and b
192232
//
193233
// It assumes that:
@@ -223,3 +263,188 @@ func matchLen(a []byte, b []byte) int {
223263
}
224264
return len(a) + checked
225265
}
266+
267+
func encodeBlockSnappy(dst, src []byte) (d int) {
268+
// Initialize the hash table.
269+
const (
270+
tableBits = 14
271+
maxTableSize = 1 << tableBits
272+
)
273+
274+
var table [maxTableSize]uint32
275+
276+
// sLimit is when to stop looking for offset/length copies. The inputMargin
277+
// lets us use a fast path for emitLiteral in the main loop, while we are
278+
// looking for copies.
279+
sLimit := len(src) - inputMargin
280+
281+
// Bail if we can't compress to at least this.
282+
dstLimit := len(src) - len(src)>>5 - 5
283+
284+
// nextEmit is where in src the next emitLiteral should start from.
285+
nextEmit := 0
286+
287+
// The encoded form must start with a literal, as there are no previous
288+
// bytes to copy, so we start looking for hash matches at s == 1.
289+
s := 1
290+
cv := load64(src, s)
291+
292+
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
293+
repeat := 1
294+
295+
for {
296+
candidate := 0
297+
for {
298+
// Next src position to check
299+
nextS := s + (s-nextEmit)>>6 + 4
300+
if nextS > sLimit {
301+
goto emitRemainder
302+
}
303+
hash0 := hash6(cv, tableBits)
304+
hash1 := hash6(cv>>8, tableBits)
305+
candidate = int(table[hash0])
306+
candidate2 := int(table[hash1])
307+
table[hash0] = uint32(s)
308+
table[hash1] = uint32(s + 1)
309+
hash2 := hash6(cv>>16, tableBits)
310+
311+
// Check repeat at offset checkRep.
312+
const checkRep = 1
313+
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
314+
base := s + checkRep
315+
// Extend back
316+
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
317+
i--
318+
base--
319+
}
320+
d += emitLiteral(dst[d:], src[nextEmit:base])
321+
322+
// Extend forward
323+
candidate := s - repeat + 4 + checkRep
324+
s += 4 + checkRep
325+
for s <= sLimit {
326+
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
327+
s += bits.TrailingZeros64(diff) >> 3
328+
break
329+
}
330+
s += 8
331+
candidate += 8
332+
}
333+
334+
d += emitCopyNoRepeat(dst[d:], repeat, s-base)
335+
nextEmit = s
336+
if s >= sLimit {
337+
goto emitRemainder
338+
}
339+
340+
cv = load64(src, s)
341+
continue
342+
}
343+
344+
if uint32(cv) == load32(src, candidate) {
345+
break
346+
}
347+
candidate = int(table[hash2])
348+
if uint32(cv>>8) == load32(src, candidate2) {
349+
table[hash2] = uint32(s + 2)
350+
candidate = candidate2
351+
s++
352+
break
353+
}
354+
table[hash2] = uint32(s + 2)
355+
if uint32(cv>>16) == load32(src, candidate) {
356+
s += 2
357+
break
358+
}
359+
360+
cv = load64(src, nextS)
361+
s = nextS
362+
}
363+
364+
// Extend backwards
365+
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
366+
candidate--
367+
s--
368+
}
369+
370+
// Bail if we exceed the maximum size.
371+
if d+(s-nextEmit) > dstLimit {
372+
return 0
373+
}
374+
375+
// A 4-byte match has been found. We'll later see if more than 4 bytes
376+
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
377+
// them as literal bytes.
378+
379+
d += emitLiteral(dst[d:], src[nextEmit:s])
380+
381+
// Call emitCopy, and then see if another emitCopy could be our next
382+
// move. Repeat until we find no match for the input immediately after
383+
// what was consumed by the last emitCopy call.
384+
//
385+
// If we exit this loop normally then we need to call emitLiteral next,
386+
// though we don't yet know how big the literal will be. We handle that
387+
// by proceeding to the next iteration of the main loop. We also can
388+
// exit this loop via goto if we get close to exhausting the input.
389+
for {
390+
// Invariant: we have a 4-byte match at s, and no need to emit any
391+
// literal bytes prior to s.
392+
base := s
393+
repeat = base - candidate
394+
395+
// Extend the 4-byte match as long as possible.
396+
s += 4
397+
candidate += 4
398+
for s <= len(src)-8 {
399+
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
400+
s += bits.TrailingZeros64(diff) >> 3
401+
break
402+
}
403+
s += 8
404+
candidate += 8
405+
}
406+
407+
d += emitCopyNoRepeat(dst[d:], repeat, s-base)
408+
if false {
409+
// Validate match.
410+
a := src[base:s]
411+
b := src[base-repeat : base-repeat+(s-base)]
412+
if !bytes.Equal(a, b) {
413+
panic("mismatch")
414+
}
415+
}
416+
417+
nextEmit = s
418+
if s >= sLimit {
419+
goto emitRemainder
420+
}
421+
422+
if d > dstLimit {
423+
// Do we have space for more, if not bail.
424+
return 0
425+
}
426+
// Check for an immediate match, otherwise start search at s+1
427+
x := load64(src, s-2)
428+
m2Hash := hash6(x, tableBits)
429+
currHash := hash6(x>>16, tableBits)
430+
candidate = int(table[currHash])
431+
table[m2Hash] = uint32(s - 2)
432+
table[currHash] = uint32(s)
433+
if uint32(x>>16) != load32(src, candidate) {
434+
cv = load64(src, s+1)
435+
s++
436+
break
437+
}
438+
}
439+
}
440+
441+
emitRemainder:
442+
if nextEmit < len(src) {
443+
// Bail if we exceed the maximum size.
444+
if d+len(src)-nextEmit > dstLimit {
445+
return 0
446+
}
447+
d += emitLiteral(dst[d:], src[nextEmit:])
448+
}
449+
return d
450+
}

0 commit comments

Comments
 (0)