Skip to content

Commit 07a5f92

Browse files
authored
s2: Fix absolute forward seeks (#633)
* s2: Fix absolute forward seeks Un-indexed forward seeks does not preserve correct absolute offset when fully skipping blocks. If stream is seekable, but no index is provided, allow non-random seeking. Fixes #632
1 parent 51e1025 commit 07a5f92

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

s2/decode.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,7 @@ func (r *Reader) Skip(n int64) error {
791791
} else {
792792
// Skip block completely
793793
n -= int64(dLen)
794+
r.blockStart += int64(dLen)
794795
dLen = 0
795796
}
796797
r.i, r.j = 0, dLen
@@ -921,6 +922,15 @@ func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
921922
err = r.index.LoadStream(rs)
922923
if err != nil {
923924
if err == ErrUnsupported {
925+
// If we don't require random seeking, reset input and return.
926+
if !random {
927+
_, err = rs.Seek(pos, io.SeekStart)
928+
if err != nil {
929+
return nil, ErrCantSeek{Reason: "resetting stream returned: " + err.Error()}
930+
}
931+
r.index = nil
932+
return &ReadSeeker{Reader: r}, nil
933+
}
924934
return nil, ErrCantSeek{Reason: "input stream does not contain an index"}
925935
}
926936
return nil, ErrCantSeek{Reason: "reading index returned: " + err.Error()}

s2/index_test.go

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"io/ioutil"
88
"math/rand"
99
"sync"
10+
"testing"
1011

1112
"github.com/klauspost/compress/s2"
1213
)
@@ -99,3 +100,126 @@ func ExampleIndex_Load() {
99100
//Successfully skipped forward to 4444440
100101
//Successfully skipped forward to 4999995
101102
}
103+
104+
func TestSeeking(t *testing.T) {
105+
compressed := bytes.Buffer{}
106+
107+
// Use small blocks so there are plenty of them.
108+
enc := s2.NewWriter(&compressed, s2.WriterBlockSize(16<<10))
109+
var nElems = 1_000_000
110+
var testSizes = []int{100, 1_000, 10_000, 20_000, 100_000, 200_000, 400_000}
111+
if testing.Short() {
112+
nElems = 100_000
113+
testSizes = []int{100, 1_000, 10_000, 20_000}
114+
}
115+
testSizes = append(testSizes, nElems-1)
116+
//24 bytes per item plus \n = 25 bytes per record
117+
for i := 0; i < nElems; i++ {
118+
fmt.Fprintf(enc, "Item %019d\n", i)
119+
}
120+
121+
index, err := enc.CloseIndex()
122+
if err != nil {
123+
t.Fatal(err)
124+
}
125+
126+
for _, skip := range testSizes {
127+
t.Run(fmt.Sprintf("noSeekSkip=%d", skip), func(t *testing.T) {
128+
dec := s2.NewReader(io.NopCloser(bytes.NewReader(compressed.Bytes())))
129+
seeker, err := dec.ReadSeeker(false, nil)
130+
if err != nil {
131+
t.Fatal(err)
132+
}
133+
buf := make([]byte, 25)
134+
for rec := 0; rec < nElems; rec += skip {
135+
offset := int64(rec * 25)
136+
//t.Logf("Reading record %d", rec)
137+
_, err := seeker.Seek(offset, io.SeekStart)
138+
if err != nil {
139+
t.Fatalf("Failed to seek: %v", err)
140+
}
141+
_, err = io.ReadFull(dec, buf)
142+
if err != nil {
143+
t.Fatalf("Failed to seek: %v", err)
144+
}
145+
expected := fmt.Sprintf("Item %019d\n", rec)
146+
if string(buf) != expected {
147+
t.Fatalf("Expected %q, got %q", expected, buf)
148+
}
149+
}
150+
})
151+
t.Run(fmt.Sprintf("seekSkip=%d", skip), func(t *testing.T) {
152+
dec := s2.NewReader(io.ReadSeeker(bytes.NewReader(compressed.Bytes())))
153+
seeker, err := dec.ReadSeeker(false, nil)
154+
if err != nil {
155+
t.Fatal(err)
156+
}
157+
buf := make([]byte, 25)
158+
for rec := 0; rec < nElems; rec += skip {
159+
offset := int64(rec * 25)
160+
//t.Logf("Reading record %d", rec)
161+
_, err := seeker.Seek(offset, io.SeekStart)
162+
if err != nil {
163+
t.Fatalf("Failed to seek: %v", err)
164+
}
165+
_, err = io.ReadFull(dec, buf)
166+
if err != nil {
167+
t.Fatalf("Failed to seek: %v", err)
168+
}
169+
expected := fmt.Sprintf("Item %019d\n", rec)
170+
if string(buf) != expected {
171+
t.Fatalf("Expected %q, got %q", expected, buf)
172+
}
173+
}
174+
})
175+
t.Run(fmt.Sprintf("noSeekIndexSkip=%d", skip), func(t *testing.T) {
176+
dec := s2.NewReader(io.NopCloser(bytes.NewReader(compressed.Bytes())))
177+
seeker, err := dec.ReadSeeker(false, index)
178+
if err != nil {
179+
t.Fatal(err)
180+
}
181+
buf := make([]byte, 25)
182+
for rec := 0; rec < nElems; rec += skip {
183+
offset := int64(rec * 25)
184+
//t.Logf("Reading record %d", rec)
185+
_, err := seeker.Seek(offset, io.SeekStart)
186+
if err != nil {
187+
t.Fatalf("Failed to seek: %v", err)
188+
}
189+
_, err = io.ReadFull(dec, buf)
190+
if err != nil {
191+
t.Fatalf("Failed to seek: %v", err)
192+
}
193+
expected := fmt.Sprintf("Item %019d\n", rec)
194+
if string(buf) != expected {
195+
t.Fatalf("Expected %q, got %q", expected, buf)
196+
}
197+
}
198+
})
199+
t.Run(fmt.Sprintf("seekIndexSkip=%d", skip), func(t *testing.T) {
200+
dec := s2.NewReader(io.ReadSeeker(bytes.NewReader(compressed.Bytes())))
201+
202+
seeker, err := dec.ReadSeeker(false, index)
203+
if err != nil {
204+
t.Fatal(err)
205+
}
206+
buf := make([]byte, 25)
207+
for rec := 0; rec < nElems; rec += skip {
208+
offset := int64(rec * 25)
209+
//t.Logf("Reading record %d", rec)
210+
_, err := seeker.Seek(offset, io.SeekStart)
211+
if err != nil {
212+
t.Fatalf("Failed to seek: %v", err)
213+
}
214+
_, err = io.ReadFull(dec, buf)
215+
if err != nil {
216+
t.Fatalf("Failed to seek: %v", err)
217+
}
218+
expected := fmt.Sprintf("Item %019d\n", rec)
219+
if string(buf) != expected {
220+
t.Fatalf("Expected %q, got %q", expected, buf)
221+
}
222+
}
223+
})
224+
}
225+
}

0 commit comments

Comments
 (0)