Skip to content

Commit 60e376b

Browse files
authored
huff0: Assembler improvements (#736)
Main changes: * Compute out[id * dstEvery + i] statically. This shaves four instructions off the main loops. (It also frees up a register.) * Track "exhausted" by addition instead or OR. This gets rid of an additional instruction. The variable is now also zeroed inside the loop as a dependency hint. Benchmark results show small speedups on some datasets: ``` name old speed new speed delta Decompress1XTable/digits-8 350MB/s ± 0% 350MB/s ± 1% ~ (p=0.764 n=10+9) Decompress1XTable/gettysburg-8 270MB/s ± 1% 268MB/s ± 1% -0.72% (p=0.001 n=10+10) Decompress1XTable/twain-8 329MB/s ± 1% 328MB/s ± 0% ~ (p=0.035 n=10+9) Decompress1XTable/low-ent.10k-8 387MB/s ± 1% 386MB/s ± 0% ~ (p=0.027 n=10+8) Decompress1XTable/superlow-ent-10k-8 377MB/s ± 0% 375MB/s ± 0% -0.48% (p=0.000 n=10+10) Decompress1XTable/crash2-8 17.0MB/s ± 0% 16.9MB/s ± 0% -0.36% (p=0.004 n=9+10) Decompress1XTable/endzerobits-8 53.3MB/s ± 0% 53.0MB/s ± 0% -0.55% (p=0.000 n=10+9) Decompress1XTable/endnonzero-8 11.3MB/s ± 0% 11.3MB/s ± 1% ~ (p=0.060 n=10+10) Decompress1XTable/case1-8 22.0MB/s ± 0% 21.9MB/s ± 1% ~ (p=0.015 n=9+9) Decompress1XTable/case2-8 18.1MB/s ± 1% 18.1MB/s ± 1% ~ (p=0.202 n=10+9) Decompress1XTable/case3-8 19.1MB/s ± 1% 19.2MB/s ± 1% ~ (p=0.056 n=9+10) Decompress1XTable/pngdata.001-8 374MB/s ± 0% 374MB/s ± 0% ~ (p=0.148 n=10+10) Decompress1XTable/normcount2-8 54.4MB/s ± 1% 54.4MB/s ± 1% ~ (p=0.617 n=10+10) Decompress1XNoTable/digits/100-8 280MB/s ± 0% 280MB/s ± 1% ~ (p=0.951 n=9+10) Decompress1XNoTable/digits/10000-8 366MB/s ± 1% 367MB/s ± 0% ~ (p=0.090 n=10+9) Decompress1XNoTable/digits/262143-8 348MB/s ± 1% 349MB/s ± 0% ~ (p=0.043 n=10+10) Decompress1XNoTable/gettysburg/100-8 276MB/s ± 0% 277MB/s ± 1% +0.44% (p=0.009 n=10+10) Decompress1XNoTable/gettysburg/10000-8 363MB/s ± 1% 363MB/s ± 0% ~ (p=0.041 n=10+7) Decompress1XNoTable/gettysburg/262143-8 349MB/s ± 1% 350MB/s ± 0% ~ (p=0.123 n=10+10) Decompress1XNoTable/twain/100-8 267MB/s ± 0% 268MB/s ± 0% ~ (p=0.052 n=10+10) Decompress1XNoTable/twain/10000-8 357MB/s ± 3% 363MB/s ± 0% +1.74% (p=0.000 n=10+10) Decompress1XNoTable/twain/262143-8 320MB/s ± 2% 329MB/s ± 0% +3.09% (p=0.000 n=10+10) Decompress1XNoTable/low-ent.10k/100-8 183MB/s ± 1% 184MB/s ± 0% ~ (p=0.211 n=9+10) Decompress1XNoTable/low-ent.10k/10000-8 377MB/s ± 3% 385MB/s ± 1% +2.14% (p=0.000 n=10+10) Decompress1XNoTable/low-ent.10k/262143-8 386MB/s ± 1% 389MB/s ± 1% +0.84% (p=0.005 n=10+10) Decompress1XNoTable/superlow-ent-10k/262143-8 382MB/s ± 2% 389MB/s ± 1% +1.89% (p=0.001 n=10+10) Decompress1XNoTable/crash2/100-8 276MB/s ± 2% 278MB/s ± 0% ~ (p=0.180 n=10+8) Decompress1XNoTable/crash2/10000-8 373MB/s ± 1% 374MB/s ± 1% ~ (p=0.315 n=10+10) Decompress1XNoTable/crash2/262143-8 373MB/s ± 1% 375MB/s ± 0% ~ (p=0.165 n=10+8) Decompress1XNoTable/endzerobits/100-8 184MB/s ± 0% 184MB/s ± 1% ~ (p=0.845 n=9+9) Decompress1XNoTable/endzerobits/10000-8 384MB/s ± 1% 386MB/s ± 0% +0.61% (p=0.007 n=10+10) Decompress1XNoTable/endzerobits/262143-8 387MB/s ± 2% 389MB/s ± 0% ~ (p=0.963 n=9+8) Decompress1XNoTable/endnonzero/100-8 181MB/s ± 2% 183MB/s ± 0% ~ (p=0.017 n=9+10) Decompress1XNoTable/endnonzero/10000-8 385MB/s ± 0% 382MB/s ± 1% -0.88% (p=0.001 n=8+10) Decompress1XNoTable/endnonzero/262143-8 387MB/s ± 1% 385MB/s ± 2% ~ (p=0.143 n=10+10) Decompress1XNoTable/case1/100-8 278MB/s ± 2% 282MB/s ± 1% ~ (p=0.013 n=10+9) Decompress1XNoTable/case1/10000-8 373MB/s ± 1% 373MB/s ± 0% ~ (p=0.274 n=10+8) Decompress1XNoTable/case1/262143-8 374MB/s ± 1% 374MB/s ± 0% ~ (p=0.589 n=10+9) Decompress1XNoTable/case2/100-8 274MB/s ± 0% 274MB/s ± 0% -0.26% (p=0.002 n=10+9) Decompress1XNoTable/case2/10000-8 378MB/s ± 0% 377MB/s ± 0% ~ (p=0.093 n=10+10) Decompress1XNoTable/case2/262143-8 377MB/s ± 1% 376MB/s ± 1% ~ (p=0.225 n=10+10) Decompress1XNoTable/case3/100-8 266MB/s ± 0% 265MB/s ± 0% -0.20% (p=0.007 n=10+9) Decompress1XNoTable/case3/10000-8 371MB/s ± 0% 372MB/s ± 0% ~ (p=0.211 n=10+9) Decompress1XNoTable/case3/262143-8 373MB/s ± 0% 374MB/s ± 0% ~ (p=0.073 n=10+10) Decompress1XNoTable/pngdata.001/100-8 239MB/s ± 0% 239MB/s ± 0% ~ (p=0.889 n=9+10) Decompress1XNoTable/pngdata.001/10000-8 384MB/s ± 0% 384MB/s ± 0% ~ (p=0.228 n=10+8) Decompress1XNoTable/pngdata.001/262143-8 377MB/s ± 0% 379MB/s ± 0% +0.56% (p=0.000 n=10+10) Decompress1XNoTable/normcount2/100-8 281MB/s ± 1% 282MB/s ± 1% ~ (p=0.015 n=10+10) Decompress1XNoTable/normcount2/10000-8 368MB/s ± 0% 370MB/s ± 0% +0.37% (p=0.004 n=10+10) Decompress1XNoTable/normcount2/262143-8 371MB/s ± 0% 371MB/s ± 0% ~ (p=0.034 n=8+10) Decompress4XNoTable/digits/100-8 200MB/s ± 1% 201MB/s ± 0% ~ (p=0.274 n=8+10) Decompress4XNoTable/digits/10000-8 603MB/s ± 0% 622MB/s ± 1% +3.20% (p=0.000 n=8+10) Decompress4XNoTable/digits/262143-8 578MB/s ± 0% 595MB/s ± 1% +2.87% (p=0.000 n=8+10) Decompress4XNoTable/gettysburg/100-8 260MB/s ± 0% 260MB/s ± 1% ~ (p=0.011 n=8+10) Decompress4XNoTable/gettysburg/10000-8 643MB/s ± 0% 657MB/s ± 1% +2.19% (p=0.000 n=10+9) Decompress4XNoTable/gettysburg/262143-8 572MB/s ± 0% 589MB/s ± 0% +2.93% (p=0.000 n=8+10) Decompress4XNoTable/twain/100-8 206MB/s ± 1% 206MB/s ± 1% ~ (p=0.436 n=10+10) Decompress4XNoTable/twain/10000-8 639MB/s ± 1% 653MB/s ± 1% +2.25% (p=0.000 n=10+10) Decompress4XNoTable/twain/262143-8 516MB/s ± 0% 522MB/s ± 1% +1.09% (p=0.004 n=10+10) Decompress4XNoTable/low-ent.10k/100-8 207MB/s ± 1% 207MB/s ± 0% ~ (p=1.000 n=10+9) Decompress4XNoTable/low-ent.10k/10000-8 631MB/s ± 0% 653MB/s ± 0% +3.42% (p=0.000 n=10+9) Decompress4XNoTable/low-ent.10k/262143-8 685MB/s ± 1% 696MB/s ± 0% +1.61% (p=0.000 n=10+10) Decompress4XNoTable/superlow-ent-10k/262143-8 684MB/s ± 1% 695MB/s ± 1% +1.51% (p=0.000 n=9+10) Decompress4XNoTable/case1/100-8 208MB/s ± 1% 207MB/s ± 0% ~ (p=0.353 n=10+10) Decompress4XNoTable/case1/10000-8 601MB/s ± 0% 621MB/s ± 1% +3.22% (p=0.000 n=10+10) Decompress4XNoTable/case1/262143-8 613MB/s ± 1% 632MB/s ± 0% +3.14% (p=0.000 n=10+10) Decompress4XNoTable/case2/100-8 210MB/s ± 2% 208MB/s ± 2% ~ (p=0.315 n=10+9) Decompress4XNoTable/case2/10000-8 618MB/s ± 0% 636MB/s ± 0% +2.95% (p=0.000 n=10+10) Decompress4XNoTable/case2/262143-8 635MB/s ± 0% 651MB/s ± 0% +2.56% (p=0.000 n=7+10) Decompress4XNoTable/case3/100-8 199MB/s ± 1% 200MB/s ± 1% ~ (p=0.055 n=10+10) Decompress4XNoTable/case3/10000-8 615MB/s ± 0% 633MB/s ± 1% +2.94% (p=0.000 n=10+10) Decompress4XNoTable/case3/262143-8 620MB/s ± 0% 639MB/s ± 1% +3.00% (p=0.000 n=10+10) Decompress4XNoTable/pngdata.001/100-8 212MB/s ± 0% 211MB/s ± 1% ~ (p=0.211 n=10+9) Decompress4XNoTable/pngdata.001/10000-8 649MB/s ± 0% 667MB/s ± 1% +2.76% (p=0.000 n=10+10) Decompress4XNoTable/pngdata.001/262143-8 646MB/s ± 0% 660MB/s ± 0% +2.28% (p=0.000 n=9+10) Decompress4XNoTable/normcount2/100-8 261MB/s ± 1% 262MB/s ± 1% ~ (p=0.031 n=9+9) Decompress4XNoTable/normcount2/10000-8 589MB/s ± 1% 613MB/s ± 0% +3.99% (p=0.000 n=10+9) Decompress4XNoTable/normcount2/262143-8 585MB/s ± 3% 617MB/s ± 1% +5.57% (p=0.000 n=10+10) Decompress4XNoTableTableLog8/digits-8 579MB/s ± 2% 610MB/s ± 0% +5.33% (p=0.000 n=10+10) Decompress4XTable/digits-8 584MB/s ± 1% 607MB/s ± 1% +3.89% (p=0.000 n=10+10) Decompress4XTable/gettysburg-8 370MB/s ± 0% 373MB/s ± 1% +0.67% (p=0.009 n=10+10) Decompress4XTable/twain-8 512MB/s ± 2% 523MB/s ± 1% +2.08% (p=0.000 n=9+10) Decompress4XTable/low-ent.10k-8 656MB/s ± 1% 677MB/s ± 1% +3.21% (p=0.000 n=10+10) Decompress4XTable/superlow-ent-10k-8 603MB/s ± 4% 626MB/s ± 1% +3.91% (p=0.000 n=9+10) Decompress4XTable/case1-8 21.1MB/s ± 0% 21.0MB/s ± 0% -0.55% (p=0.000 n=9+9) Decompress4XTable/case2-8 17.6MB/s ± 0% 17.6MB/s ± 1% ~ (p=0.736 n=9+10) Decompress4XTable/case3-8 18.7MB/s ± 1% 18.7MB/s ± 1% ~ (p=0.642 n=10+10) Decompress4XTable/pngdata.001-8 648MB/s ± 0% 657MB/s ± 0% +1.50% (p=0.000 n=10+8) Decompress4XTable/normcount2-8 49.7MB/s ± 1% 49.7MB/s ± 1% ~ (p=0.839 n=10+10) [Geo mean] 271MB/s 274MB/s +0.96% ```
1 parent 272358c commit 60e376b

File tree

2 files changed

+340
-350
lines changed

2 files changed

+340
-350
lines changed

huff0/_generate/gen.go

Lines changed: 56 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,11 @@ func (d decompress4x) generateProcedure(name string) {
4747
Doc(name+" is an x86 assembler implementation of Decompress4X when tablelog > 8.decodes a sequence", "")
4848
Pragma("noescape")
4949

50-
exhausted := GP64()
51-
XORQ(exhausted.As64(), exhausted.As64()) // exhausted = false
52-
50+
exhausted := GP8()
51+
buffer := GP64()
5352
limit := GP64()
5453

55-
bufferOrigin := GP64()
5654
peekBits := GP64()
57-
buffer := GP64()
5855
dstEvery := GP64()
5956
table := GP64()
6057

@@ -64,7 +61,7 @@ func (d decompress4x) generateProcedure(name string) {
6461
{
6562
ctx := Dereference(Param("ctx"))
6663
Load(ctx.Field("peekBits"), peekBits)
67-
Load(ctx.Field("out"), bufferOrigin)
64+
Load(ctx.Field("out"), buffer)
6865
Load(ctx.Field("limit"), limit)
6966
Load(ctx.Field("dstEvery"), dstEvery)
7067
Load(ctx.Field("tbl"), table)
@@ -74,27 +71,26 @@ func (d decompress4x) generateProcedure(name string) {
7471
Comment("Main loop")
7572
Label("main_loop")
7673

77-
MOVQ(bufferOrigin, buffer)
78-
// Check if we have space
74+
// Check if we have space. We could zero exhausted outside the loop,
75+
// but doing it here is a hint to the CPU that there's no dependency
76+
// on the previous iteration's value.
77+
XORL(exhausted.As32(), exhausted.As32())
7978
CMPQ(buffer, limit)
8079
SETGE(exhausted.As8())
81-
d.decodeTwoValues(0, br, peekBits, table, buffer, exhausted)
82-
ADDQ(dstEvery, buffer)
83-
d.decodeTwoValues(1, br, peekBits, table, buffer, exhausted)
84-
ADDQ(dstEvery, buffer)
85-
d.decodeTwoValues(2, br, peekBits, table, buffer, exhausted)
86-
ADDQ(dstEvery, buffer)
87-
d.decodeTwoValues(3, br, peekBits, table, buffer, exhausted)
80+
d.decodeTwoValues(0, br, peekBits, table, buffer, dstEvery, exhausted)
81+
d.decodeTwoValues(1, br, peekBits, table, buffer, dstEvery, exhausted)
82+
d.decodeTwoValues(2, br, peekBits, table, buffer, dstEvery, exhausted)
83+
d.decodeTwoValues(3, br, peekBits, table, buffer, dstEvery, exhausted)
8884

89-
ADDQ(U8(2), bufferOrigin) // off += 2
85+
ADDQ(U8(2), buffer) // off += 2
9086

91-
TESTB(exhausted.As8(), exhausted.As8()) // any br[i].ofs < 4?
87+
TESTB(exhausted, exhausted) // any br[i].ofs < 4?
9288
JZ(LabelRef("main_loop"))
9389

9490
{
9591
ctx := Dereference(Param("ctx"))
9692
ctxout, _ := ctx.Field("out").Resolve()
97-
decoded := bufferOrigin
93+
decoded := buffer
9894
SUBQ(ctxout.Addr, decoded)
9995
SHLQ(U8(2), decoded) // decoded *= 4
10096

@@ -105,15 +101,14 @@ func (d decompress4x) generateProcedure(name string) {
105101
}
106102

107103
// TODO [wmu]: I believe it's doable in avo, but can't figure out how to deal
108-
//
109-
// with arbitrary pointers to a given type
104+
// with arbitrary pointers to a given type
110105
const bitReader_in = 0
111106
const bitReader_off = bitReader_in + 3*8 // {ptr, len, cap}
112107
const bitReader_value = bitReader_off + 8
113108
const bitReader_bitsRead = bitReader_value + 8
114109
const bitReader__size = bitReader_bitsRead + 8
115110

116-
func (d decompress4x) decodeTwoValues(id int, br, peekBits, table, buffer, exhausted reg.GPVirtual) {
111+
func (d decompress4x) decodeTwoValues(id int, br, peekBits, table, buffer, dstEvery, exhausted reg.GPVirtual) {
117112
brValue, brBitsRead := d.fillFast32(id, 32, br, exhausted)
118113

119114
val := GP64()
@@ -149,7 +144,7 @@ func (d decompress4x) decodeTwoValues(id int, br, peekBits, table, buffer, exhau
149144
Comment("these two writes get coalesced")
150145
Comment("out[id * dstEvery + 0] = uint8(v0.entry >> 8)")
151146
Comment("out[id * dstEvery + 1] = uint8(v1.entry >> 8)")
152-
MOVW(out.As16(), Mem{Base: buffer})
147+
MOVW(out.As16(), bufferIndex(id, buffer, dstEvery))
153148

154149
Comment("update the bitreader structure")
155150
offset := id * bitReader__size
@@ -163,14 +158,11 @@ func (d decompress4x) generateProcedure4x8bit(name string) {
163158
Doc(name+" is an x86 assembler implementation of Decompress4X when tablelog > 8.decodes a sequence", "")
164159
Pragma("noescape")
165160

166-
exhausted := GP64() // Fixed since we need 8H
167-
XORQ(exhausted.As64(), exhausted.As64()) // exhausted = false
168-
169-
bufferOrigin := GP64()
161+
exhausted := GP8()
162+
buffer := GP64()
170163
limit := GP64()
171164

172165
peekBits := GP64()
173-
buffer := GP64()
174166
dstEvery := GP64()
175167
table := GP64()
176168

@@ -180,7 +172,7 @@ func (d decompress4x) generateProcedure4x8bit(name string) {
180172
{
181173
ctx := Dereference(Param("ctx"))
182174
Load(ctx.Field("peekBits"), peekBits)
183-
Load(ctx.Field("out"), bufferOrigin)
175+
Load(ctx.Field("out"), buffer)
184176
Load(ctx.Field("limit"), limit)
185177
Load(ctx.Field("dstEvery"), dstEvery)
186178
Load(ctx.Field("tbl"), table)
@@ -190,27 +182,26 @@ func (d decompress4x) generateProcedure4x8bit(name string) {
190182
Comment("Main loop")
191183
Label("main_loop")
192184

193-
MOVQ(bufferOrigin, buffer)
194-
// Check if we have space
185+
// Check if we have space. We could zero exhausted outside the loop,
186+
// but doing it here is a hint to the CPU that there's no dependency
187+
// on the previous iteration's value.
188+
XORL(exhausted.As32(), exhausted.As32())
195189
CMPQ(buffer, limit)
196-
SETGE(exhausted.As8())
197-
d.decodeFourValues(0, br, peekBits, table, buffer, exhausted)
198-
ADDQ(dstEvery, buffer)
199-
d.decodeFourValues(1, br, peekBits, table, buffer, exhausted)
200-
ADDQ(dstEvery, buffer)
201-
d.decodeFourValues(2, br, peekBits, table, buffer, exhausted)
202-
ADDQ(dstEvery, buffer)
203-
d.decodeFourValues(3, br, peekBits, table, buffer, exhausted)
190+
SETGE(exhausted)
191+
d.decodeFourValues(0, br, peekBits, table, buffer, dstEvery, exhausted)
192+
d.decodeFourValues(1, br, peekBits, table, buffer, dstEvery, exhausted)
193+
d.decodeFourValues(2, br, peekBits, table, buffer, dstEvery, exhausted)
194+
d.decodeFourValues(3, br, peekBits, table, buffer, dstEvery, exhausted)
204195

205-
ADDQ(U8(4), bufferOrigin) // off += 4
196+
ADDQ(U8(4), buffer) // off += 4
206197

207-
TESTB(exhausted.As8(), exhausted.As8()) // any br[i].ofs < 4?
198+
TESTB(exhausted, exhausted) // any br[i].ofs < 4?
208199
JZ(LabelRef("main_loop"))
209200

210201
{
211202
ctx := Dereference(Param("ctx"))
212203
ctxout, _ := ctx.Field("out").Resolve()
213-
decoded := bufferOrigin
204+
decoded := buffer
214205
SUBQ(ctxout.Addr, decoded)
215206
SHLQ(U8(2), decoded) // decoded *= 4
216207

@@ -219,7 +210,7 @@ func (d decompress4x) generateProcedure4x8bit(name string) {
219210
RET()
220211
}
221212

222-
func (d decompress4x) decodeFourValues(id int, br, peekBits, table, buffer, exhausted reg.GPVirtual) {
213+
func (d decompress4x) decodeFourValues(id int, br, peekBits, table, buffer, dstEvery, exhausted reg.GPVirtual) {
223214
brValue, brBitsRead := d.fillFast32(id, 32, br, exhausted)
224215

225216
decompress := func(valID int, outByte reg.Register) {
@@ -253,14 +244,29 @@ func (d decompress4x) decodeFourValues(id int, br, peekBits, table, buffer, exha
253244
Comment("out[id * dstEvery + 1] = uint8(v1.entry >> 8)")
254245
Comment("out[id * dstEvery + 3] = uint8(v2.entry >> 8)")
255246
Comment("out[id * dstEvery + 4] = uint8(v3.entry >> 8)")
256-
MOVL(out.As32(), Mem{Base: buffer})
247+
MOVL(out.As32(), bufferIndex(id, buffer, dstEvery))
257248

258249
Comment("update the bitreader structure")
259250
offset := id * bitReader__size
260251
MOVQ(brValue, Mem{Base: br, Disp: offset + bitReader_value})
261252
MOVB(brBitsRead.As8(), Mem{Base: br, Disp: offset + bitReader_bitsRead})
262253
}
263254

255+
func bufferIndex(id int, buffer, dstEvery reg.GPVirtual) Mem {
256+
switch id {
257+
case 0:
258+
return Mem{Base: buffer}
259+
case 1, 2:
260+
return Mem{Base: buffer, Index: dstEvery, Scale: byte(id)}
261+
case 3:
262+
stride3 := GP64() // stride3 := 3*dstEvery
263+
LEAQ(Mem{Base: dstEvery, Index: dstEvery, Scale: 2}, stride3)
264+
return Mem{Base: buffer, Index: stride3, Scale: 1}
265+
default:
266+
panic("id must be >=0, <4")
267+
}
268+
}
269+
264270
func (d decompress4x) fillFast32(id, atLeast int, br, exhausted reg.GPVirtual) (brValue, brBitsRead reg.GPVirtual) {
265271
if atLeast > 32 {
266272
panic(fmt.Sprintf("at least (%d) cannot be >32", atLeast))
@@ -297,11 +303,11 @@ func (d decompress4x) fillFast32(id, atLeast int, br, exhausted reg.GPVirtual) (
297303
MOVQ(brOffset, Mem{Base: br, Disp: offset + bitReader_off})
298304
ORQ(tmp.As64(), brValue)
299305
{
300-
Commentf("exhausted = exhausted || (br%d.off < 4)", id)
306+
Commentf("exhausted += (br%d.off < 4)", id)
301307
CMPQ(brOffset, U8(4))
302-
tmp = GP64()
303-
SETLT(tmp.As8())
304-
ORB(tmp.As8(), exhausted.As8())
308+
// Add carry from brOffset-4. We do this at most four times per iteration,
309+
// and every iteration resets exhausted's lower byte, so it doesn't overflow.
310+
ADCB(I8(0), exhausted)
305311
}
306312

307313
Label("skip_fill" + strconv.Itoa(id))
@@ -409,7 +415,7 @@ func (d decompress1x) generateProcedure(name string) {
409415
outCap := GP64()
410416
Load(ctx.Field("outCap"), outCap)
411417
CMPQ(outCap, U8(4))
412-
JB(LabelRef("error_max_decoded_size_exeeded"))
418+
JB(LabelRef("error_max_decoded_size_exceeded"))
413419

414420
LEAQ(Mem{Base: buffer, Index: outCap, Scale: 1}, bufferEnd)
415421

@@ -432,7 +438,7 @@ func (d decompress1x) generateProcedure(name string) {
432438
tmp := GP64()
433439
LEAQ(Mem{Base: buffer, Disp: 4}, tmp)
434440
CMPQ(tmp, bufferEnd)
435-
JGE(LabelRef("error_max_decoded_size_exeeded"))
441+
JGE(LabelRef("error_max_decoded_size_exceeded"))
436442
}
437443

438444
decompress := func(id int, out reg.Register) {
@@ -474,7 +480,7 @@ func (d decompress1x) generateProcedure(name string) {
474480
RET()
475481

476482
Comment("Report error")
477-
Label("error_max_decoded_size_exeeded")
483+
Label("error_max_decoded_size_exceeded")
478484
{
479485
ctx := Dereference(Param("ctx"))
480486
tmp := GP64()

0 commit comments

Comments
 (0)