Skip to content

Commit 0978757

Browse files
authored
[pkg/stanza] log when files are rotated/moved/truncated (#33237)
**Description:** <Describe what has changed.> <!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> This PR adds the logging part from #31256. With this addition every time that is identified that file is rotated either by move/create or copy/truncate, proper logging takes place. **Link to tracking Issue:** <Issue number if applicable> #31256 **Testing:** <Describe what testing was performed and which tests were added.> Updated existing unit tests ### How to test this manually Using the following config: ```yaml receivers: filelog: start_at: beginning poll_interval: 5s include: - /var/log/busybox/monitoring/stable*.log exporters: debug: verbosity: detailed service: telemetry: logs: level: info pipelines: logs: receivers: [filelog] exporters: [debug] processors: [] ``` #### Testing truncate (out of pattern) ```console echo "$(date '+%FT%H:%M:%S.%NZ') some line1" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line2" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line3" >> /var/log/busybox/monitoring/stable_trunc.log && sleep 6 && cp /var/log/busybox/monitoring/stable_trunc.log /var/log/busybox/monitoring/stable_trunc.log.1 && : > /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line new0" >> /var/log/busybox/monitoring/stable_trunc.log ``` #### Testing truncate (in pattern) ```console echo "$(date '+%FT%H:%M:%S.%NZ') some line1" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line2" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line3" >> /var/log/busybox/monitoring/stable_trunc.log && sleep 6 && cp /var/log/busybox/monitoring/stable_trunc.log /var/log/busybox/monitoring/stable_trunc_1.log && : > /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line new1" >> /var/log/busybox/monitoring/stable_trunc.log ``` #### Testing move/create (out of pattern) ```console echo "$(date '+%FT%H:%M:%S.%NZ') some line1" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line2" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line3" >> /var/log/busybox/monitoring/stable_trunc.log && sleep 6 && mv /var/log/busybox/monitoring/stable_trunc.log /var/log/busybox/monitoring/stable_trunc.log.1 && echo "$(date '+%FT%H:%M:%S.%NZ') some line new0" >> /var/log/busybox/monitoring/stable_trunc.log ``` #### Testing move/create (in pattern) ```console echo "$(date '+%FT%H:%M:%S.%NZ') some line1" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line2" >> /var/log/busybox/monitoring/stable_trunc.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line3" >> /var/log/busybox/monitoring/stable_trunc.log && sleep 6 && mv /var/log/busybox/monitoring/stable_trunc.log /var/log/busybox/monitoring/stable_trunc_1.log && echo "$(date '+%FT%H:%M:%S.%NZ') some line new0" >> /var/log/busybox/monitoring/stable_trunc.log ``` **Documentation:** <Describe the documentation added.> Add some extra notes in the `design.md` --------- Signed-off-by: ChrsMark <[email protected]>
1 parent 8a91da7 commit 0978757

File tree

6 files changed

+148
-3
lines changed

6 files changed

+148
-3
lines changed

.chloggen/add_filelog_logging.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: filelogreceiver
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Log when files are rotated/moved/truncated
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [33237]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [user]

pkg/stanza/fileconsumer/design.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,40 @@ When the operator shuts down, the following occurs:
206206
The net effect of the shut down routine is that all files are checkpointed in a normal manner
207207
(i.e. not in the middle of a log entry), and all checkpoints are persisted.
208208

209+
### Log rotation
210+
211+
#### Supported cases
212+
213+
A) When a file is moved within the pattern with unread logs on the end, then the original is created again,
214+
we get the unread logs on the moved as well as any new logs written to the newly created file.
215+
216+
B) When a file is copied within the pattern with unread logs on the end, then the original is truncated,
217+
we get the unread logs on the copy as well as any new logs written to the truncated file.
218+
219+
C) When a file it rotated out of pattern via move/create, we detect that
220+
our old handle is still valid and we attempt to read from it.
221+
222+
D) When a file it rotated out of pattern via copy/truncate, we detect that
223+
our old handle is invalid and we do not attempt to read from it.
224+
225+
226+
#### Rotated files that end up within the matching pattern
227+
228+
In both cases of copy/truncate and move/create, if the rotated files match the pattern
229+
then the old readers that point to the original path will be closed and we will create new
230+
ones which will be pointing to the rotated file but using the existing metadata's offset.
231+
The receiver will continue consuming the rotated paths in any case so there will be
232+
no data loss during the transition.
233+
The original files will have a fresh fingerprint so they will be consumed by a completely
234+
new reader.
235+
236+
#### Rotated files that end up out of the matching pattern
237+
238+
In case of a file has been rotated with move/create, the old handle will be pointing
239+
to the moved file so we can still consume from it even if it's out of the pattern.
240+
In case of the file has been rotated with copy/truncate, the old handle will be pointing
241+
to the original file which is truncated. So we don't have a handle in order to consume any remaining
242+
logs from the moved file. This can cause data loss.
209243

210244
# Known Limitations
211245

pkg/stanza/fileconsumer/file.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ func (m *Manager) makeReaders(ctx context.Context, paths []string) {
208208
// Exclude duplicate paths with the same content. This can happen when files are
209209
// being rotated with copy/truncate strategy. (After copy, prior to truncate.)
210210
if r := m.tracker.GetCurrentFile(fp); r != nil {
211+
m.set.Logger.Debug("Skipping duplicate file", zap.String("path", file.Name()))
211212
// re-add the reader as Match() removes duplicates
212213
m.tracker.Add(r)
213214
if err := file.Close(); err != nil {
@@ -229,6 +230,19 @@ func (m *Manager) makeReaders(ctx context.Context, paths []string) {
229230
func (m *Manager) newReader(ctx context.Context, file *os.File, fp *fingerprint.Fingerprint) (*reader.Reader, error) {
230231
// Check previous poll cycle for match
231232
if oldReader := m.tracker.GetOpenFile(fp); oldReader != nil {
233+
if oldReader.GetFileName() != file.Name() {
234+
if !oldReader.Validate() {
235+
m.set.Logger.Debug(
236+
"File has been rotated(truncated)",
237+
zap.String("original_path", oldReader.GetFileName()),
238+
zap.String("rotated_path", file.Name()))
239+
} else {
240+
m.set.Logger.Debug(
241+
"File has been rotated(moved)",
242+
zap.String("original_path", oldReader.GetFileName()),
243+
zap.String("rotated_path", file.Name()))
244+
}
245+
}
232246
return m.readerFactory.NewReaderFromMetadata(file, oldReader.Close())
233247
}
234248

pkg/stanza/fileconsumer/file_other.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"context"
1010
"sync"
1111

12+
"go.uber.org/zap"
13+
1214
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/reader"
1315
)
1416

@@ -34,20 +36,25 @@ OUTER:
3436
continue
3537
}
3638

37-
// At this point, we know that the file has been rotated. However, we do not know
38-
// if it was moved or truncated. If truncated, then both handles point to the same
39-
// file, in which case we should only read from it using the new reader. We can use
39+
// At this point, we know that the file has been rotated out of the matching pattern.
40+
// However, we do not know if it was moved or truncated.
41+
// If truncated, then both handles point to the same file, in which case
42+
// we should only read from it using the new reader. We can use
4043
// the Validate method to ensure that the file has not been truncated.
4144
if !oldReader.Validate() {
45+
m.set.Logger.Debug("File has been rotated(truncated)", zap.String("path", oldReader.GetFileName()))
4246
continue OUTER
4347
}
48+
// oldreader points to the rotated file after the move/rename. We can still read from it.
49+
m.set.Logger.Debug("File has been rotated(moved)", zap.String("path", oldReader.GetFileName()))
4450
}
4551
lostReaders = append(lostReaders, oldReader)
4652
}
4753

4854
var lostWG sync.WaitGroup
4955
for _, lostReader := range lostReaders {
5056
lostWG.Add(1)
57+
m.set.Logger.Debug("Reading lost file", zap.String("path", lostReader.GetFileName()))
5158
go func(r *reader.Reader) {
5259
defer lostWG.Done()
5360
m.readingFiles.Add(ctx, 1)

pkg/stanza/fileconsumer/internal/reader/reader.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ func (r *Reader) Validate() bool {
184184
return false
185185
}
186186

187+
func (r *Reader) GetFileName() string {
188+
return r.fileName
189+
}
190+
187191
func (m Metadata) GetFingerprint() *fingerprint.Fingerprint {
188192
return m.Fingerprint
189193
}

pkg/stanza/fileconsumer/rotation_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ import (
1414
"testing"
1515
"time"
1616

17+
"github.com/stretchr/testify/assert"
1718
"github.com/stretchr/testify/require"
19+
"go.uber.org/zap"
20+
"go.uber.org/zap/zaptest/observer"
1821

1922
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/filetest"
2023
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil"
@@ -215,6 +218,9 @@ func TestTrackRotatedFilesLogOrder(t *testing.T) {
215218
cfg := NewConfig().includeDir(tempDir)
216219
cfg.StartAt = "beginning"
217220
operator, sink := testManager(t, cfg)
221+
core, observedLogs := observer.New(zap.DebugLevel)
222+
logger := zap.New(core)
223+
operator.set.Logger = logger
218224

219225
originalFile := filetest.OpenTemp(t, tempDir)
220226
orginalName := originalFile.Name()
@@ -240,6 +246,16 @@ func TestTrackRotatedFilesLogOrder(t *testing.T) {
240246
filetest.WriteString(t, newFile, "testlog3\n")
241247

242248
sink.ExpectTokens(t, []byte("testlog2"), []byte("testlog3"))
249+
250+
// verify that proper logging has taken place
251+
allLogs := observedLogs.All()
252+
foundLog := false
253+
for _, actualLog := range allLogs {
254+
if actualLog.Message == "File has been rotated(moved)" {
255+
foundLog = true
256+
}
257+
}
258+
assert.True(t, foundLog)
243259
}
244260

245261
// When a file it rotated out of pattern via move/create, we should
@@ -256,6 +272,9 @@ func TestRotatedOutOfPatternMoveCreate(t *testing.T) {
256272
cfg.StartAt = "beginning"
257273
operator, sink := testManager(t, cfg)
258274
operator.persister = testutil.NewUnscopedMockPersister()
275+
core, observedLogs := observer.New(zap.DebugLevel)
276+
logger := zap.New(core)
277+
operator.set.Logger = logger
259278

260279
originalFile := filetest.OpenTempWithPattern(t, tempDir, "*.log1")
261280
originalFileName := originalFile.Name()
@@ -280,6 +299,20 @@ func TestRotatedOutOfPatternMoveCreate(t *testing.T) {
280299

281300
// expect remaining log from old file as well as all from new file
282301
sink.ExpectTokens(t, []byte("testlog2"), []byte("testlog4"), []byte("testlog5"))
302+
303+
// verify that proper logging has taken place
304+
allLogs := observedLogs.All()
305+
expectedLogs := map[string]string{
306+
"File has been rotated(moved)": "",
307+
"Reading lost file": "",
308+
}
309+
foundLogs := 0
310+
for _, actualLog := range allLogs {
311+
if _, ok := expectedLogs[actualLog.Message]; ok {
312+
foundLogs++
313+
}
314+
}
315+
assert.Equal(t, 2, foundLogs)
283316
}
284317

285318
// When a file it rotated out of pattern via copy/truncate, we should
@@ -293,6 +326,9 @@ func TestRotatedOutOfPatternCopyTruncate(t *testing.T) {
293326
cfg.StartAt = "beginning"
294327
operator, sink := testManager(t, cfg)
295328
operator.persister = testutil.NewUnscopedMockPersister()
329+
core, observedLogs := observer.New(zap.DebugLevel)
330+
logger := zap.New(core)
331+
operator.set.Logger = logger
296332

297333
originalFile := filetest.OpenTempWithPattern(t, tempDir, "*.log1")
298334
filetest.WriteString(t, originalFile, "testlog1\n")
@@ -318,6 +354,16 @@ func TestRotatedOutOfPatternCopyTruncate(t *testing.T) {
318354
operator.poll(context.Background())
319355

320356
sink.ExpectTokens(t, []byte("testlog4"), []byte("testlog5"))
357+
358+
// verify that proper logging has taken place
359+
allLogs := observedLogs.All()
360+
foundLog := false
361+
for _, actualLog := range allLogs {
362+
if actualLog.Message == "File has been rotated(truncated)" {
363+
foundLog = true
364+
}
365+
}
366+
assert.True(t, foundLog)
321367
}
322368

323369
// TruncateThenWrite tests that, after a file has been truncated,
@@ -333,6 +379,9 @@ func TestTruncateThenWrite(t *testing.T) {
333379
cfg.StartAt = "beginning"
334380
operator, sink := testManager(t, cfg)
335381
operator.persister = testutil.NewUnscopedMockPersister()
382+
core, observedLogs := observer.New(zap.DebugLevel)
383+
logger := zap.New(core)
384+
operator.set.Logger = logger
336385

337386
temp1 := filetest.OpenTemp(t, tempDir)
338387
filetest.WriteString(t, temp1, "testlog1\ntestlog2\n")
@@ -348,6 +397,16 @@ func TestTruncateThenWrite(t *testing.T) {
348397
operator.poll(context.Background())
349398
sink.ExpectToken(t, []byte("testlog3"))
350399
sink.ExpectNoCalls(t)
400+
401+
// verify that proper logging has taken place
402+
allLogs := observedLogs.All()
403+
foundLog := false
404+
for _, actualLog := range allLogs {
405+
if actualLog.Message == "File has been rotated(truncated)" {
406+
foundLog = true
407+
}
408+
}
409+
assert.True(t, foundLog)
351410
}
352411

353412
// CopyTruncateWriteBoth tests that when a file is copied

0 commit comments

Comments
 (0)