Skip to content

Commit 0e937a6

Browse files
authored
Merge pull request #123501 from cockroachdb/blathers/backport-release-24.1.0-rc-123277
release-24.1.0-rc: colexecerror: avoid debug.Stack in CatchVectorizedRuntimeError
2 parents 010b7b3 + 879751e commit 0e937a6

File tree

7 files changed

+373
-37
lines changed

7 files changed

+373
-37
lines changed

pkg/sql/colexecerror/BUILD.bazel

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,24 @@ go_library(
1515

1616
go_test(
1717
name = "colexecerror_test",
18-
srcs = ["error_test.go"],
18+
srcs = [
19+
"error_test.go",
20+
"main_test.go",
21+
],
1922
deps = [
2023
":colexecerror",
24+
"//pkg/base",
25+
"//pkg/security/securityassets",
26+
"//pkg/security/securitytest",
27+
"//pkg/server",
28+
"//pkg/sql/pgwire/pgcode",
29+
"//pkg/sql/pgwire/pgerror",
30+
"//pkg/testutils/serverutils",
31+
"//pkg/testutils/sqlutils",
2132
"//pkg/util/leaktest",
2233
"//pkg/util/log",
34+
"//pkg/util/randutil",
35+
"@com_github_cockroachdb_errors//:errors",
2336
"@com_github_stretchr_testify//require",
2437
],
2538
)

pkg/sql/colexecerror/error.go

Lines changed: 106 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
package colexecerror
1212

1313
import (
14-
"bufio"
1514
"context"
16-
"runtime/debug"
15+
"runtime"
1716
"strings"
1817

1918
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
@@ -36,25 +35,78 @@ func CatchVectorizedRuntimeError(operation func()) (retErr error) {
3635
return
3736
}
3837

39-
// Find where the panic came from and only proceed if it is related to the
40-
// vectorized engine.
41-
stackTrace := string(debug.Stack())
42-
scanner := bufio.NewScanner(strings.NewReader(stackTrace))
43-
panicLineFound := false
44-
for scanner.Scan() {
45-
if strings.Contains(scanner.Text(), panicLineSubstring) {
46-
panicLineFound = true
47-
break
38+
// First check for error types that should only come from the vectorized
39+
// engine.
40+
if err, ok := panicObj.(error); ok {
41+
// StorageError was caused by something below SQL, and represents an error
42+
// that we'd simply like to propagate along.
43+
var se *StorageError
44+
// notInternalError is an error from the vectorized engine that we'd
45+
// simply like to propagate along.
46+
var nie *notInternalError
47+
// internalError is an error from the vectorized engine that might need to
48+
// be returned to the client with a stacktrace, sentry report, and
49+
// "internal error" designation.
50+
var ie *internalError
51+
passthrough := errors.As(err, &se) || errors.As(err, &nie)
52+
if errors.As(err, &ie) {
53+
// Unwrap so that internalError doesn't show up in sentry reports.
54+
retErr = ie.Unwrap()
55+
// If the internal error doesn't already have an error code, mark it as
56+
// an assertion error so that we generate a sentry report. (We don't do
57+
// this for StorageError, notInternalError, or context.Canceled to avoid
58+
// creating unnecessary sentry reports.)
59+
if !passthrough && !errors.Is(retErr, context.Canceled) {
60+
if code := pgerror.GetPGCode(retErr); code == pgcode.Uncategorized {
61+
retErr = errors.NewAssertionErrorWithWrappedErrf(
62+
retErr, "unexpected error from the vectorized engine",
63+
)
64+
}
65+
}
66+
return
67+
}
68+
if passthrough {
69+
retErr = err
70+
return
4871
}
4972
}
50-
if !panicLineFound {
51-
panic(errors.AssertionFailedf("panic line %q not found in the stack trace\n%s", panicLineSubstring, stackTrace))
52-
}
53-
if !scanner.Scan() {
54-
panic(errors.AssertionFailedf("unexpectedly there is no line below the panic line in the stack trace\n%s", stackTrace))
73+
74+
// For other types of errors, we need to check whence the panic originated
75+
// to know what to do. If the panic originated in the vectorized engine, we
76+
// can safely return it as a normal error knowing that any illegal state
77+
// will be cleaned up when the statement finishes. If the panic originated
78+
// lower in the stack, however, we must treat it as unrecoverable because it
79+
// could indicate an illegal state that might persist even after this
80+
// statement finishes.
81+
//
82+
// To check whence the panic originated, we find the frame just before the
83+
// panic frame.
84+
var panicLineFound bool
85+
var panicEmittedFrom string
86+
// We should be able to find it within 3 program counters, starting with the
87+
// caller of this deferred function (2 above the runtime.Callers frame).
88+
pc := make([]uintptr, 3)
89+
n := runtime.Callers(2, pc)
90+
if n >= 1 {
91+
frames := runtime.CallersFrames(pc[:n])
92+
// A fixed number of program counters can expand to any number of frames.
93+
for {
94+
frame, more := frames.Next()
95+
if strings.Contains(frame.File, panicLineSubstring) {
96+
panicLineFound = true
97+
} else if panicLineFound {
98+
panicEmittedFrom = frame.Function
99+
break
100+
}
101+
if !more {
102+
break
103+
}
104+
}
55105
}
56-
panicEmittedFrom := strings.TrimSpace(scanner.Text())
57106
if !shouldCatchPanic(panicEmittedFrom) {
107+
// The panic is from outside the vectorized engine (or we didn't find it
108+
// in the stack). We treat it as unrecoverable because it could indicate
109+
// an illegal state that might persist even after this statement finishes.
58110
panic(panicObj)
59111
}
60112

@@ -66,20 +118,11 @@ func CatchVectorizedRuntimeError(operation func()) (retErr error) {
66118
}
67119
retErr = err
68120

69-
if _, ok := panicObj.(*StorageError); ok {
70-
// A StorageError was caused by something below SQL, and represents
71-
// an error that we'd simply like to propagate along.
72-
// Do nothing.
73-
return
74-
}
75-
76121
annotateErrorWithoutCode := true
77-
var nie *notInternalError
78-
if errors.Is(err, context.Canceled) || errors.As(err, &nie) {
79-
// We don't want to annotate the context cancellation and
80-
// notInternalError errors in case they don't have a valid PG code
81-
// so that the sentry report is not sent (errors with failed
82-
// assertions get sentry reports).
122+
if errors.Is(err, context.Canceled) {
123+
// We don't want to annotate the context cancellation errors in case they
124+
// don't have a valid PG code so that the sentry report is not sent
125+
// (errors with failed assertions get sentry reports).
83126
annotateErrorWithoutCode = false
84127
}
85128
if code := pgerror.GetPGCode(err); annotateErrorWithoutCode && code == pgcode.Uncategorized {
@@ -106,6 +149,9 @@ const (
106149
sqlColPackagesPrefix = "github.com/cockroachdb/cockroach/pkg/sql/col"
107150
sqlRowPackagesPrefix = "github.com/cockroachdb/cockroach/pkg/sql/row"
108151
sqlSemPackagesPrefix = "github.com/cockroachdb/cockroach/pkg/sql/sem"
152+
// When running BenchmarkCatchVectorizedRuntimeError under bazel, the
153+
// repository prefix is missing.
154+
testSqlColPackagesPrefix = "pkg/sql/col"
109155
)
110156

111157
// shouldCatchPanic checks whether the panic that was emitted from
@@ -135,7 +181,8 @@ func shouldCatchPanic(panicEmittedFrom string) bool {
135181
strings.HasPrefix(panicEmittedFrom, execinfraPackagePrefix) ||
136182
strings.HasPrefix(panicEmittedFrom, sqlColPackagesPrefix) ||
137183
strings.HasPrefix(panicEmittedFrom, sqlRowPackagesPrefix) ||
138-
strings.HasPrefix(panicEmittedFrom, sqlSemPackagesPrefix)
184+
strings.HasPrefix(panicEmittedFrom, sqlSemPackagesPrefix) ||
185+
strings.HasPrefix(panicEmittedFrom, testSqlColPackagesPrefix)
139186
}
140187

141188
// StorageError is an error that was created by a component below the sql
@@ -183,16 +230,41 @@ func decodeNotInternalError(
183230
return newNotInternalError(cause)
184231
}
185232

233+
// internalError is an error that occurs because the vectorized engine is in an
234+
// unexpected state. Usually it wraps an assertion error.
235+
type internalError struct {
236+
cause error
237+
}
238+
239+
func newInternalError(err error) *internalError {
240+
return &internalError{cause: err}
241+
}
242+
243+
var (
244+
_ errors.Wrapper = &internalError{}
245+
)
246+
247+
func (e *internalError) Error() string { return e.cause.Error() }
248+
func (e *internalError) Cause() error { return e.cause }
249+
func (e *internalError) Unwrap() error { return e.Cause() }
250+
251+
func decodeInternalError(
252+
_ context.Context, cause error, _ string, _ []string, _ proto.Message,
253+
) error {
254+
return newInternalError(cause)
255+
}
256+
186257
func init() {
187258
errors.RegisterWrapperDecoder(errors.GetTypeKey((*notInternalError)(nil)), decodeNotInternalError)
259+
errors.RegisterWrapperDecoder(errors.GetTypeKey((*internalError)(nil)), decodeInternalError)
188260
}
189261

190-
// InternalError simply panics with the provided object. It will always be
191-
// caught and returned as internal error to the client with the corresponding
262+
// InternalError panics with the error wrapped by internalError. It will always
263+
// be caught and returned as internal error to the client with the corresponding
192264
// stack trace. This method should be called to propagate errors that resulted
193265
// in the vectorized engine being in an *unexpected* state.
194266
func InternalError(err error) {
195-
panic(err)
267+
panic(newInternalError(err))
196268
}
197269

198270
// ExpectedError panics with the error that is wrapped by

0 commit comments

Comments
 (0)