Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 78 additions & 43 deletions prog.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ const minVerifierLogSize = 64 * 1024
// future, but avoid the unnecessary EINVAL for now.
const maxVerifierLogSize = math.MaxUint32 >> 2

// maxVerifierAttempts is the maximum number of times the verifier will retry
// loading a program with a growing log buffer before giving up. Since we double
// the log size on every attempt, this is the absolute maximum number of
// attempts before the buffer reaches [maxVerifierLogSize].
const maxVerifierAttempts = 30

// ProgramOptions control loading a program into the kernel.
type ProgramOptions struct {
// Bitmap controlling the detail emitted by the kernel's eBPF verifier log.
Expand Down Expand Up @@ -418,59 +424,44 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
return nil, fmt.Errorf("log level: %w", internal.ErrNotSupportedOnOS)
}

// The caller requested a specific verifier log level. Set up the log buffer
// so that there is a chance of loading the program in a single shot.
logSize := internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize)
var logBuf []byte
if !opts.LogDisabled && opts.LogLevel != 0 {
logBuf = make([]byte, logSize)
attr.LogLevel = opts.LogLevel
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = sys.SlicePointer(logBuf)
}

for {
var fd *sys.FD
var fd *sys.FD
if opts.LogDisabled {
// Loading with logging disabled should never retry.
fd, err = sys.ProgLoad(attr)
if err == nil {
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
return &Program{"", fd, spec.Name, "", spec.Type}, nil
}

if opts.LogDisabled {
break
}

if attr.LogTrueSize != 0 && attr.LogSize >= attr.LogTrueSize {
// The log buffer already has the correct size.
break
} else {
// Only specify log size if log level is also specified. Setting size
// without level results in EINVAL. Level will be bumped to LogLevelBranch
// if the first load fails.
if opts.LogLevel != 0 {
attr.LogLevel = opts.LogLevel
attr.LogSize = internal.Between(opts.LogSizeStart, minVerifierLogSize, maxVerifierLogSize)
}

if attr.LogSize != 0 && !errors.Is(err, unix.ENOSPC) {
// Logging is enabled and the error is not ENOSPC, so we can infer
// that the log buffer is large enough.
break
}
attempts := 1
for {
if attr.LogLevel != 0 {
logBuf = make([]byte, attr.LogSize)
attr.LogBuf = sys.SlicePointer(logBuf)
}

if attr.LogLevel == 0 {
// Logging is not enabled but loading the program failed. Enable
// basic logging.
attr.LogLevel = LogLevelBranch
}
fd, err = sys.ProgLoad(attr)
if err == nil {
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
}

// Make an educated guess how large the buffer should be by multiplying.
// Ensure the size doesn't overflow.
const factor = 2
logSize = internal.Between(logSize, minVerifierLogSize, maxVerifierLogSize/factor)
logSize *= factor
if !retryLogAttrs(attr, opts.LogSizeStart, err) {
break
}

if attr.LogTrueSize != 0 {
// The kernel has given us a hint how large the log buffer has to be.
logSize = attr.LogTrueSize
if attempts >= maxVerifierAttempts {
return nil, fmt.Errorf("load program: %w (bug: hit %d verifier attempts)", err, maxVerifierAttempts)
}
attempts++
}

logBuf = make([]byte, logSize)
attr.LogSize = logSize
attr.LogBuf = sys.SlicePointer(logBuf)
}

end := bytes.IndexByte(logBuf, 0)
Expand Down Expand Up @@ -520,6 +511,50 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
return nil, internal.ErrorWithLog("load program", err, logBuf)
}

func retryLogAttrs(attr *sys.ProgLoadAttr, startSize uint32, err error) bool {
if attr.LogSize == maxVerifierLogSize {
// Maximum buffer size reached, don't grow or retry.
return false
}

// ENOSPC means the log was enabled on the previous iteration, so we only
// need to grow the buffer.
if errors.Is(err, unix.ENOSPC) {
if attr.LogTrueSize != 0 {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't preserve the behaviour that we don't retry if LogSize >= LogTrueSize. I'm also not sure why this only happens on ENOSPC? LogTrueSize should always be populated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LogTrueSize is not available on older kernels

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the kernel returns ENOSPC, LogSize is smaller than LogTrueSize by definition, and we just need to allocate whatever LogTrueSize specified and retry. I don't think what I wrote here will result in a retry if LogSize > LogTrueSize. What am I missing?

// Kernel supports LogTrueSize and previous iteration undershot the buffer
// size. Try again with the given true size.
attr.LogSize = attr.LogTrueSize
return true
}

// Ensure the size doesn't overflow.
const factor = 2
if attr.LogSize >= maxVerifierLogSize/factor {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this whole factor thing not very intuitive. This is to avoid overflow?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have an alternative suggestion? The intention is for this to keep working if/when maxVerifierLogSize gets bumped to MaxUint32. Currently, we can't rely on a u32 overflow happening, detecting it, and clamping the value to maxVerifierLogSize. Either way, that's more code and more obtuse than this solution. I'll go ahead with the current version for now.

attr.LogSize = maxVerifierLogSize
return true
}

// Make an educated guess how large the buffer should be by multiplying. Due
// to int division, this rounds down odd sizes.
attr.LogSize = internal.Between(attr.LogSize, minVerifierLogSize, maxVerifierLogSize/factor)
attr.LogSize *= factor

return true
}

if attr.LogLevel == 0 {
// Loading the program failed, it wasn't a buffer-related error, and the log
// was disabled the previous iteration. Enable basic logging and retry.
attr.LogLevel = LogLevelBranch
attr.LogSize = internal.Between(startSize, minVerifierLogSize, maxVerifierLogSize)
return true
}

// Loading the program failed for a reason other than buffer size and the log
// was already enabled the previous iteration. Don't retry.
return false
}

// NewProgramFromFD creates a program from a raw fd.
//
// You should not use fd after calling this function.
Expand Down
89 changes: 89 additions & 0 deletions prog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,95 @@ func TestProgramVerifierLog(t *testing.T) {
qt.Assert(t, qt.Not(qt.HasLen(prog.VerifierLog, 0)))
}

func TestProgramVerifierLogRetry(t *testing.T) {
someError := errors.New("not a buffer error")

t.Run("retry with oversized buffer, no log_true_size", func(t *testing.T) {
// First load failure, without logging enabled. Retry with logging enabled.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))
qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize))

// Second failure with logging enabled. No buffer error, don't retry.
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError)))
qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize))
})

t.Run("retry with oversized buffer, with log_true_size", func(t *testing.T) {
// First load failure, without logging enabled. Retry with larger buffer.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))

// Buffer was sufficiently large and log_true_size was set. Don't retry and
// don't modify LogSize to LogTrueSize.
attr.LogTrueSize = 123
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError)))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize))
})

t.Run("retry with undersized buffer, no log_true_size", func(t *testing.T) {
// First load failure, without logging enabled. Retry with larger buffer.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))

// Second failure, this time the kernel signals an undersized buffer. Retry
// with double the size.
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC)))
qt.Assert(t, qt.Equals(attr.LogSize, minVerifierLogSize*2))
})

t.Run("retry with undersized buffer, with log_true_size", func(t *testing.T) {
// First load failure, without logging enabled. Retry with larger buffer.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, someError)))

// Second failure, the kernel signals undersized buffer and also sets
// log_true_size. Retry with the exact size required.
attr.LogTrueSize = 123
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC)))
qt.Assert(t, qt.Equals(attr.LogSize, 123))
})

t.Run("grow to maximum buffer size", func(t *testing.T) {
// Previous loads pushed the log size to (or above) half of the maximum,
// which would make it overflow on the next retry. Make sure the log size
// actually hits the maximum so we can bail out.
attr := &sys.ProgLoadAttr{LogLevel: LogLevelBranch, LogSize: maxVerifierLogSize / 2}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, 0, unix.ENOSPC)))
qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize))

// Don't retry if the buffer is already at the maximum size, no matter
// the return code.
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, someError)))
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC)))
})

t.Run("start at maximum buffer size", func(t *testing.T) {
// The user requested a log buffer exceeding the maximum size, but no log
// level. Retry with the maximum size and default log level.
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
qt.Assert(t, qt.IsTrue(retryLogAttrs(attr, math.MaxUint32, unix.EINVAL)))
qt.Assert(t, qt.Equals(attr.LogLevel, LogLevelBranch))
qt.Assert(t, qt.Equals(attr.LogSize, maxVerifierLogSize))

// Log still doesn't fit maximum-size buffer. Don't retry.
qt.Assert(t, qt.IsFalse(retryLogAttrs(attr, 0, unix.ENOSPC)))
})

t.Run("ensure growth terminates within max attempts", func(t *testing.T) {
attr := &sys.ProgLoadAttr{LogLevel: 0, LogSize: 0}
var terminated bool
for i := 1; i <= maxVerifierAttempts; i++ {
if !retryLogAttrs(attr, 0, syscall.ENOSPC) {
terminated = true
}
}
qt.Assert(t, qt.IsTrue(terminated))
})
}

func TestProgramWithUnsatisfiedMap(t *testing.T) {
coll, err := LoadCollectionSpec("testdata/loader-el.elf")
if err != nil {
Expand Down
Loading