Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions pkg/fanal/secret/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sort"
"strings"
"sync"
"unicode/utf8"

"github.com/samber/lo"
"golang.org/x/xerrors"
Expand All @@ -19,7 +20,12 @@ import (
"github.com/aquasecurity/trivy/pkg/log"
)

var lineSep = []byte{'\n'}
var (
lineSep = []byte{'\n'}
warnUTF8Once = sync.OnceFunc(func() {
log.WithPrefix(log.PrefixSecret).Warn("Invalid UTF-8 sequences detected in file content, replacing with empty string")
})
)

type Scanner struct {
logger *log.Logger
Expand Down Expand Up @@ -280,7 +286,7 @@ func ParseConfig(configPath string) (*Config, error) {
return nil, nil
}

logger := log.WithPrefix("secret").With("config_path", configPath)
logger := log.WithPrefix(log.PrefixSecret).With("config_path", configPath)
f, err := os.Open(configPath)
if errors.Is(err, os.ErrNotExist) {
// If the specified file doesn't exist, it just uses built-in rules and allow rules.
Expand Down Expand Up @@ -318,7 +324,7 @@ func convertSeverity(logger *log.Logger, severity string) string {
}

func NewScanner(config *Config) Scanner {
logger := log.WithPrefix("secret")
logger := log.WithPrefix(log.PrefixSecret)

// Use the default rules
if config == nil {
Expand Down Expand Up @@ -512,7 +518,7 @@ func findLocation(start, end int, content []byte) (int, int, types.Code, string)
lineStart = lo.Ternary(start-lineStart-30 < 0, lineStart, start-30)
lineEnd = lo.Ternary(end+20 > lineEnd, lineEnd, end+20)
}
matchLine := string(content[lineStart:lineEnd])
matchLine := sanitizeUTF8String(content[lineStart:lineEnd])
endLineNum := startLineNum + bytes.Count(content[start:end], lineSep)

var code types.Code
Expand All @@ -529,9 +535,9 @@ func findLocation(start, end int, content []byte) (int, int, types.Code, string)

var strRawLine string
if len(rawLine) > maxLineLength {
strRawLine = lo.Ternary(inCause, matchLine, string(rawLine[:maxLineLength]))
strRawLine = lo.Ternary(inCause, matchLine, sanitizeUTF8String(rawLine[:maxLineLength]))
} else {
strRawLine = string(rawLine)
strRawLine = sanitizeUTF8String(rawLine)
}

code.Lines = append(code.Lines, types.Line{
Expand All @@ -555,3 +561,14 @@ func findLocation(start, end int, content []byte) (int, int, types.Code, string)

return startLineNum + 1, endLineNum + 1, code, matchLine
}

// sanitizeUTF8String converts bytes to a valid UTF-8 string, logging a warning once if invalid sequences are found
func sanitizeUTF8String(data []byte) string {
if utf8.Valid(data) {
return string(data)
}

warnUTF8Once()

return strings.ToValidUTF8(string(data), "")
}
36 changes: 36 additions & 0 deletions pkg/fanal/secret/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,42 @@ func TestSecretScanner(t *testing.T) {
Findings: []types.SecretFinding{wantFindingTokenInsideJs},
},
},
{
name: "invalid UTF-8 sequences in secrets",
configPath: filepath.Join("testdata", "skip-test.yaml"),
inputFilePath: filepath.Join("testdata", "invalid-utf8.txt"),
want: types.Secret{
FilePath: filepath.Join("testdata", "invalid-utf8.txt"),
Findings: []types.SecretFinding{
{
RuleID: "github-pat",
Category: secret.CategoryGitHub,
Title: "GitHub Personal Access Token",
Severity: "CRITICAL",
StartLine: 1,
EndLine: 1,
Match: "token=****************************************",
Code: types.Code{
Lines: []types.Line{
{
Number: 1,
Content: "token=****************************************",
Highlighted: "token=****************************************",
IsCause: true,
FirstCause: true,
LastCause: true,
},
{
Number: 2,
Content: "# Comment with invalid UTF-8: ",
Highlighted: "# Comment with invalid UTF-8: ",
},
},
},
},
},
},
},
}

for _, tt := range tests {
Expand Down
3 changes: 3 additions & 0 deletions pkg/fanal/secret/testdata/invalid-utf8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
token=ghp_abcdef1234567890ABCDEF1234567890abcd
# Comment with invalid UTF-8: €‚ƒ
token2=ghp_1234567890abcdef1234567890ABCDEFþÿabcd
Loading