@@ -20,6 +20,7 @@ import (
2020 "math"
2121 "sort"
2222 "sync"
23+ "sync/atomic"
2324
2425 "github.com/RoaringBitmap/roaring"
2526 index "github.com/blevesearch/bleve_index_api"
@@ -32,6 +33,10 @@ var NewSegmentBufferNumResultsBump int = 100
3233var NewSegmentBufferNumResultsFactor float64 = 1.0
3334var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
3435
36+ // This flag controls the disk stats collection from the segment files
37+ // during indexing and querying
38+ var CollectDiskStats bool
39+
3540// ValidateDocFields can be set by applications to perform additional checks
3641// on fields in a document being added to a new segment, by default it does
3742// nothing.
@@ -80,6 +85,7 @@ func (*ZapPlugin) newWithChunkMode(results []index.Document,
8085 if err == nil && s .reset () == nil {
8186 s .lastNumDocs = len (results )
8287 s .lastOutSize = len (br .Bytes ())
88+ sb .setBytesWritten (s .getBytesWritten ())
8389 interimPool .Put (s )
8490 }
8591
@@ -141,6 +147,9 @@ type interim struct {
141147
142148 lastNumDocs int
143149 lastOutSize int
150+
151+ // atomic access to this variable
152+ bytesWritten uint64
144153}
145154
146155func (s * interim ) reset () (err error ) {
@@ -484,6 +493,16 @@ func (s *interim) processDocument(docNum uint64,
484493 }
485494}
486495
496+ func (s * interim ) getBytesWritten () uint64 {
497+ return atomic .LoadUint64 (& s .bytesWritten )
498+ }
499+
500+ func (s * interim ) incrementBytesWritten (val uint64 ) {
501+ if CollectDiskStats {
502+ atomic .AddUint64 (& s .bytesWritten , val )
503+ }
504+ }
505+
487506func (s * interim ) writeStoredFields () (
488507 storedIndexOffset uint64 , err error ) {
489508 varBuf := make ([]byte , binary .MaxVarintLen64 )
@@ -559,7 +578,7 @@ func (s *interim) writeStoredFields() (
559578 metaBytes := s .metaBuf .Bytes ()
560579
561580 compressed = snappy .Encode (compressed [:cap (compressed )], data )
562-
581+ s . incrementBytesWritten ( uint64 ( len ( compressed )))
563582 docStoredOffsets [docNum ] = uint64 (s .w .Count ())
564583
565584 _ , err := writeUvarints (s .w ,
@@ -597,6 +616,10 @@ func (s *interim) writeStoredFields() (
597616 return storedIndexOffset , nil
598617}
599618
619+ func (s * interim ) setBytesWritten (val uint64 ) {
620+ atomic .StoreUint64 (& s .bytesWritten , val )
621+ }
622+
600623func (s * interim ) writeDicts () (fdvIndexOffset uint64 , dictOffsets []uint64 , err error ) {
601624 dictOffsets = make ([]uint64 , len (s .FieldsInv ))
602625
@@ -682,7 +705,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
682705 if err != nil {
683706 return 0 , nil , err
684707 }
685-
708+ prevBytesWritten := locEncoder . getBytesWritten ()
686709 for _ , loc := range locs [locOffset : locOffset + freqNorm .numLocs ] {
687710 err = locEncoder .Add (docNum ,
688711 uint64 (loc .fieldID ), loc .pos , loc .start , loc .end ,
@@ -696,7 +719,9 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
696719 return 0 , nil , err
697720 }
698721 }
699-
722+ if locEncoder .getBytesWritten ()- prevBytesWritten > 0 {
723+ s .incrementBytesWritten (locEncoder .getBytesWritten () - prevBytesWritten )
724+ }
700725 locOffset += freqNorm .numLocs
701726 }
702727
@@ -750,6 +775,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
750775 return 0 , nil , err
751776 }
752777
778+ s .incrementBytesWritten (uint64 (len (vellumData )))
779+
753780 // reset vellum for reuse
754781 s .builderBuf .Reset ()
755782
@@ -764,6 +791,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
764791 if err != nil {
765792 return 0 , nil , err
766793 }
794+
767795 fdvEncoder := newChunkedContentCoder (chunkSize , uint64 (len (s .results )- 1 ), s .w , false )
768796 if s .IncludeDocValues [fieldID ] {
769797 for docNum , docTerms := range docTermMap {
@@ -772,13 +800,16 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
772800 if err != nil {
773801 return 0 , nil , err
774802 }
803+ // s.incrementBytesWritten(uint64(len(docTerms)))
775804 }
776805 }
777806 err = fdvEncoder .Close ()
778807 if err != nil {
779808 return 0 , nil , err
780809 }
781810
811+ s .setBytesWritten (s .getBytesWritten ())
812+
782813 fdvOffsetsStart [fieldID ] = uint64 (s .w .Count ())
783814
784815 _ , err = fdvEncoder .Write ()
0 commit comments