Skip to content

Commit c847bde

Browse files
authored
s2: Support ReadAt in ReadSeeker (#747)
Also simplifies seeking.
1 parent 69922df commit c847bde

File tree

3 files changed

+183
-29
lines changed

3 files changed

+183
-29
lines changed

s2/decode.go

Lines changed: 92 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -880,15 +880,20 @@ func (r *Reader) Skip(n int64) error {
880880
// See Reader.ReadSeeker
881881
type ReadSeeker struct {
882882
*Reader
883+
readAtMu sync.Mutex
883884
}
884885

885-
// ReadSeeker will return an io.ReadSeeker compatible version of the reader.
886+
// ReadSeeker will return an io.ReadSeeker and io.ReaderAt
887+
// compatible version of the reader.
886888
// If 'random' is specified the returned io.Seeker can be used for
887889
// random seeking, otherwise only forward seeking is supported.
888890
// Enabling random seeking requires the original input to support
889891
// the io.Seeker interface.
890892
// A custom index can be specified which will be used if supplied.
891893
// When using a custom index, it will not be read from the input stream.
894+
// The ReadAt position will affect regular reads and the current position of Seek.
895+
// So using Read after ReadAt will continue from where the ReadAt stopped.
896+
// No functions should be used concurrently.
892897
// The returned ReadSeeker contains a shallow reference to the existing Reader,
893898
// meaning changes performed to one is reflected in the other.
894899
func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
@@ -958,42 +963,55 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
958963
// Reset on EOF
959964
r.err = nil
960965
}
961-
if offset == 0 && whence == io.SeekCurrent {
962-
return r.blockStart + int64(r.i), nil
966+
967+
// Calculate absolute offset.
968+
absOffset := offset
969+
970+
switch whence {
971+
case io.SeekStart:
972+
case io.SeekCurrent:
973+
absOffset = r.blockStart + int64(r.i) + offset
974+
case io.SeekEnd:
975+
if r.index == nil {
976+
return 0, ErrUnsupported
977+
}
978+
absOffset = r.index.TotalUncompressed + offset
979+
default:
980+
r.err = ErrUnsupported
981+
return 0, r.err
982+
}
983+
984+
if absOffset < 0 {
985+
return 0, errors.New("seek before start of file")
963986
}
987+
964988
if !r.readHeader {
965989
// Make sure we read the header.
966990
_, r.err = r.Read([]byte{})
991+
if r.err != nil {
992+
return 0, r.err
993+
}
967994
}
995+
996+
// If we are inside current block no need to seek.
997+
// This includes no offset changes.
998+
if absOffset >= r.blockStart && absOffset < r.blockStart+int64(r.j) {
999+
r.i = int(absOffset - r.blockStart)
1000+
return r.blockStart + int64(r.i), nil
1001+
}
1002+
9681003
rs, ok := r.r.(io.ReadSeeker)
9691004
if r.index == nil || !ok {
970-
if whence == io.SeekCurrent && offset >= 0 {
971-
err := r.Skip(offset)
972-
return r.blockStart + int64(r.i), err
973-
}
974-
if whence == io.SeekStart && offset >= r.blockStart+int64(r.i) {
975-
err := r.Skip(offset - r.blockStart - int64(r.i))
1005+
currOffset := r.blockStart + int64(r.i)
1006+
if absOffset >= currOffset {
1007+
err := r.Skip(absOffset - currOffset)
9761008
return r.blockStart + int64(r.i), err
9771009
}
9781010
return 0, ErrUnsupported
979-
9801011
}
9811012

982-
switch whence {
983-
case io.SeekCurrent:
984-
offset += r.blockStart + int64(r.i)
985-
case io.SeekEnd:
986-
if offset > 0 {
987-
return 0, errors.New("seek after end of file")
988-
}
989-
offset = r.index.TotalUncompressed + offset
990-
}
991-
992-
if offset < 0 {
993-
return 0, errors.New("seek before start of file")
994-
}
995-
996-
c, u, err := r.index.Find(offset)
1013+
// We can seek and we have an index.
1014+
c, u, err := r.index.Find(absOffset)
9971015
if err != nil {
9981016
return r.blockStart + int64(r.i), err
9991017
}
@@ -1004,12 +1022,57 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
10041022
return 0, err
10051023
}
10061024

1007-
r.i = r.j // Remove rest of current block.
1008-
if u < offset {
1025+
r.i = r.j // Remove rest of current block.
1026+
r.blockStart = u - int64(r.j) // Adjust current block start for accounting.
1027+
if u < absOffset {
10091028
// Forward inside block
1010-
return offset, r.Skip(offset - u)
1029+
return absOffset, r.Skip(absOffset - u)
1030+
}
1031+
if u > absOffset {
1032+
return 0, fmt.Errorf("s2 seek: (internal error) u (%d) > absOffset (%d)", u, absOffset)
1033+
}
1034+
return absOffset, nil
1035+
}
1036+
1037+
// ReadAt reads len(p) bytes into p starting at offset off in the
1038+
// underlying input source. It returns the number of bytes
1039+
// read (0 <= n <= len(p)) and any error encountered.
1040+
//
1041+
// When ReadAt returns n < len(p), it returns a non-nil error
1042+
// explaining why more bytes were not returned. In this respect,
1043+
// ReadAt is stricter than Read.
1044+
//
1045+
// Even if ReadAt returns n < len(p), it may use all of p as scratch
1046+
// space during the call. If some data is available but not len(p) bytes,
1047+
// ReadAt blocks until either all the data is available or an error occurs.
1048+
// In this respect ReadAt is different from Read.
1049+
//
1050+
// If the n = len(p) bytes returned by ReadAt are at the end of the
1051+
// input source, ReadAt may return either err == EOF or err == nil.
1052+
//
1053+
// If ReadAt is reading from an input source with a seek offset,
1054+
// ReadAt should not affect nor be affected by the underlying
1055+
// seek offset.
1056+
//
1057+
// Clients of ReadAt can execute parallel ReadAt calls on the
1058+
// same input source. This is however not recommended.
1059+
func (r *ReadSeeker) ReadAt(p []byte, offset int64) (int, error) {
1060+
r.readAtMu.Lock()
1061+
defer r.readAtMu.Unlock()
1062+
_, err := r.Seek(offset, io.SeekStart)
1063+
if err != nil {
1064+
return 0, err
1065+
}
1066+
n := 0
1067+
for n < len(p) {
1068+
n2, err := r.Read(p[n:])
1069+
if err != nil {
1070+
// This will include io.EOF
1071+
return n + n2, err
1072+
}
1073+
n += n2
10111074
}
1012-
return offset, nil
1075+
return n, nil
10131076
}
10141077

10151078
// ReadByte satisfies the io.ByteReader interface.

s2/encode_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,37 @@ func TestIndex(t *testing.T) {
366366
}
367367
})
368368
}
369+
t.Run(fmt.Sprintf("ReadAt"), func(t *testing.T) {
370+
// Read it from a seekable stream
371+
dec = NewReader(bytes.NewReader(compressed))
372+
373+
rs, err := dec.ReadSeeker(true, nil)
374+
fatalErr(t, err)
375+
376+
// Read a little...
377+
var tmp = make([]byte, len(input)/2)
378+
_, err = io.ReadFull(rs, tmp[:])
379+
fatalErr(t, err)
380+
wantLen := len(tmp)
381+
if wantLen+int(wantOffset) > len(input) {
382+
wantLen = len(input) - int(wantOffset)
383+
}
384+
// Read from wantOffset
385+
n, err := rs.ReadAt(tmp, wantOffset)
386+
if n != wantLen {
387+
t.Errorf("got length %d, want %d", n, wantLen)
388+
}
389+
if err != io.EOF {
390+
fatalErr(t, err)
391+
}
392+
want := want[:n]
393+
got := tmp[:n]
394+
395+
// Read the rest of the stream...
396+
if !bytes.Equal(got, want) {
397+
t.Error("Result mismatch", wantOffset)
398+
}
399+
})
369400
})
370401
}
371402
}

s2/index_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,66 @@ func TestSeeking(t *testing.T) {
234234
}
235235
})
236236
}
237+
// Test seek current
238+
t.Run(fmt.Sprintf("seekCurrent"), func(t *testing.T) {
239+
dec := s2.NewReader(io.ReadSeeker(bytes.NewReader(compressed.Bytes())))
240+
241+
seeker, err := dec.ReadSeeker(true, index)
242+
if err != nil {
243+
t.Fatal(err)
244+
}
245+
buf := make([]byte, 25)
246+
rng := rand.New(rand.NewSource(0))
247+
var currentOff int64
248+
for i := 0; i < nElems/10; i++ {
249+
rec := rng.Intn(nElems)
250+
offset := int64(rec * 25)
251+
//t.Logf("Reading record %d", rec)
252+
absOff, err := seeker.Seek(offset-currentOff, io.SeekCurrent)
253+
if err != nil {
254+
t.Fatalf("Failed to seek: %v", err)
255+
}
256+
if absOff != offset {
257+
t.Fatalf("Unexpected seek offset: want %v, got %v", offset, absOff)
258+
}
259+
_, err = io.ReadFull(dec, buf)
260+
if err != nil {
261+
t.Fatalf("Failed to seek: %v", err)
262+
}
263+
expected := fmt.Sprintf("Item %019d\n", rec)
264+
if string(buf) != expected {
265+
t.Fatalf("Expected %q, got %q", expected, buf)
266+
}
267+
// Adjust offset
268+
currentOff = offset + int64(len(buf))
269+
}
270+
})
271+
// Test ReadAt
272+
t.Run(fmt.Sprintf("ReadAt"), func(t *testing.T) {
273+
dec := s2.NewReader(io.ReadSeeker(bytes.NewReader(compressed.Bytes())))
274+
275+
seeker, err := dec.ReadSeeker(true, index)
276+
if err != nil {
277+
t.Fatal(err)
278+
}
279+
buf := make([]byte, 25)
280+
rng := rand.New(rand.NewSource(0))
281+
for i := 0; i < nElems/10; i++ {
282+
rec := rng.Intn(nElems)
283+
offset := int64(rec * 25)
284+
n, err := seeker.ReadAt(buf, offset)
285+
if err != nil {
286+
t.Fatalf("Failed to seek: %v", err)
287+
}
288+
if n != len(buf) {
289+
t.Fatalf("Unexpected read length: want %v, got %v", len(buf), n)
290+
}
291+
expected := fmt.Sprintf("Item %019d\n", rec)
292+
if string(buf) != expected {
293+
t.Fatalf("Expected %q, got %q", expected, buf)
294+
}
295+
}
296+
})
237297
}
238298

239299
// ExampleIndexStream shows an example of indexing a stream

0 commit comments

Comments
 (0)