Skip to content

Commit 6234e33

Browse files
authored
s2: Slightly faster non-assembly decompression (#646)
* s2: Improve Go speed Remove bounds checks and load faster.
1 parent a3bc126 commit 6234e33

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

s2/decode_other.go

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ func s2Decode(dst, src []byte) int {
2828

2929
// As long as we can read at least 5 bytes...
3030
for s < len(src)-5 {
31+
// Removing bounds checks is SLOWER, when if doing
32+
// in := src[s:s+5]
33+
// Checked on Go 1.18
3134
switch src[s] & 0x03 {
3235
case tagLiteral:
3336
x := uint32(src[s] >> 2)
@@ -38,14 +41,19 @@ func s2Decode(dst, src []byte) int {
3841
s += 2
3942
x = uint32(src[s-1])
4043
case x == 61:
44+
in := src[s : s+3]
45+
x = uint32(in[1]) | uint32(in[2])<<8
4146
s += 3
42-
x = uint32(src[s-2]) | uint32(src[s-1])<<8
4347
case x == 62:
48+
in := src[s : s+4]
49+
// Load as 32 bit and shift down.
50+
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
51+
x >>= 8
4452
s += 4
45-
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
4653
case x == 63:
54+
in := src[s : s+5]
55+
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
4756
s += 5
48-
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
4957
}
5058
length = int(x) + 1
5159
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
@@ -62,38 +70,42 @@ func s2Decode(dst, src []byte) int {
6270

6371
case tagCopy1:
6472
s += 2
65-
length = int(src[s-2]) >> 2 & 0x7
6673
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
74+
length = int(src[s-2]) >> 2 & 0x7
6775
if toffset == 0 {
6876
if debug {
6977
fmt.Print("(repeat) ")
7078
}
7179
// keep last offset
7280
switch length {
7381
case 5:
82+
length = int(src[s]) + 4
7483
s += 1
75-
length = int(uint32(src[s-1])) + 4
7684
case 6:
85+
in := src[s : s+2]
86+
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
7787
s += 2
78-
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
7988
case 7:
89+
in := src[s : s+3]
90+
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
8091
s += 3
81-
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
8292
default: // 0-> 4
8393
}
8494
} else {
8595
offset = toffset
8696
}
8797
length += 4
8898
case tagCopy2:
99+
in := src[s : s+3]
100+
offset = int(uint32(in[1]) | uint32(in[2])<<8)
101+
length = 1 + int(in[0])>>2
89102
s += 3
90-
length = 1 + int(src[s-3])>>2
91-
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
92103

93104
case tagCopy4:
105+
in := src[s : s+5]
106+
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
107+
length = 1 + int(in[0])>>2
94108
s += 5
95-
length = 1 + int(src[s-5])>>2
96-
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
97109
}
98110

99111
if offset <= 0 || d < offset || length > len(dst)-d {

0 commit comments

Comments
 (0)