diff --git a/pkg/x/json/jsonc.go b/pkg/x/json/jsonc.go new file mode 100644 index 000000000000..764b546538ca --- /dev/null +++ b/pkg/x/json/jsonc.go @@ -0,0 +1,248 @@ +package json + +import ( + "bytes" + "errors" + "io" +) + +// TokenType represents the type of token being processed +type TokenType int + +const ( + TokenNormal TokenType = iota + TokenString + TokenSingleLineComment + TokenMultiLineComment +) + +// jsoncParser manages the state and processing of JSONC content +type jsoncParser struct { + reader *bytes.Reader // Source reader + dst []byte // Destination buffer + pos int // Current position in destination + tokenType TokenType // Current token type being processed + escaped bool // Whether the previous character was an escape character + lastChar byte // Last processed character +} + +// ToRFC8259 converts JSONC (JSON with Comments) to valid JSON following RFC8259. +// It strips out comments and trailing commas while maintaining the exact character +// offsets as the input. This ensures that any JSON parser locations will map +// directly back to the original source file positions. +// +// Both line numbers and character positions are preserved in the output. +// Comments and trailing commas are replaced with spaces without changing line counts. +// +// Comments can be either: +// - Single-line: starting with // and continuing to the end of the line +// - Multi-line: starting with /* and ending with */ +// +// Trailing commas are allowed in JSONC but not in standard JSON, so they are replaced +// with spaces to maintain character offsets. +func ToRFC8259(src []byte) []byte { + dst := make([]byte, len(src)) + copy(dst, src) // Copy input to maintain same length and offsets + + parser := newJSONCParser(src, dst) + parser.process() + + return dst +} + +// UnmarshalJSONC parses JSONC (JSON with Comments) data into the specified value. +// It first converts JSONC to standard JSON following RFC8259 and then unmarshals it. +// This is a convenience function that combines ToRFC8259 and Unmarshal. +// +// The parser preserves line number information, which is essential for reporting +// errors at their correct locations in the original file. +// +// Usage example: +// +// type Config struct { +// Name string `json:"name"` +// Version string `json:"version"` +// xjson.Location // Embed Location to get line number info +// } +// +// var config Config +// if err := xjson.UnmarshalJSONC(data, &config); err != nil { +// return err +// } +func UnmarshalJSONC(data []byte, v any) error { + jsonData := ToRFC8259(data) + return Unmarshal(jsonData, v) +} + +// newJSONCParser creates a new JSONC parser +func newJSONCParser(src, dst []byte) *jsoncParser { + return &jsoncParser{ + reader: bytes.NewReader(src), + dst: dst, + pos: 0, + tokenType: TokenNormal, + } +} + +// process processes the input JSONC content +func (p *jsoncParser) process() { + for { + b, err := p.reader.ReadByte() + if errors.Is(err, io.EOF) { + break + } else if err != nil { + // Ignore other errors (not expected to occur) + break + } + p.processChar(b) + } +} + +// processChar processes a single character based on current state +func (p *jsoncParser) processChar(b byte) { + switch p.tokenType { + case TokenString: + p.processStringToken(b) + case TokenSingleLineComment: + p.processSingleLineComment(b) + case TokenMultiLineComment: + p.processMultiLineComment(b) + default: + p.processNormalToken(b) + } +} + +// processStringToken processes a character within a string literal +func (p *jsoncParser) processStringToken(b byte) { + switch { + case p.escaped: + p.escaped = false + case b == '\\': + p.escaped = true + case b == '"': + p.tokenType = TokenNormal + } + + p.lastChar = b + p.pos++ +} + +// processSingleLineComment processes a character within a single-line comment +func (p *jsoncParser) processSingleLineComment(b byte) { + if b == '\n' { + // End of single-line comment at newline + p.tokenType = TokenNormal + } else if !isPreservedWhitespace(b) { + // Replace non-whitespace characters with spaces + if p.pos < len(p.dst) { + p.dst[p.pos] = ' ' + } + } + + p.lastChar = b + p.pos++ +} + +// processMultiLineComment processes a character within a multi-line comment +func (p *jsoncParser) processMultiLineComment(b byte) { + if p.lastChar == '*' && b == '/' { + // End of multi-line comment + p.tokenType = TokenNormal + if p.pos < len(p.dst) { + p.dst[p.pos] = ' ' // Replace '/' with space + } + } else if !isPreservedWhitespace(b) { + // Replace non-whitespace with space + if p.pos < len(p.dst) { + p.dst[p.pos] = ' ' + } + } + + p.lastChar = b + p.pos++ +} + +// processNormalToken processes a character outside of string literals and comments +func (p *jsoncParser) processNormalToken(b byte) { + switch b { + case '"': + // Start of string literal + p.tokenType = TokenString + case '/': + // Potential start of comment - look ahead + nextByte, err := p.reader.ReadByte() + if err != nil { + // End of file after '/' character + return + } + + switch nextByte { + case '/': + // Start of single-line comment + p.tokenType = TokenSingleLineComment + if p.pos < len(p.dst) { + p.dst[p.pos] = ' ' // Replace '/' with space + } + if p.pos+1 < len(p.dst) { + p.dst[p.pos+1] = ' ' // Replace second '/' with space + } + p.lastChar = nextByte + p.pos += 2 + return + case '*': + // Start of multi-line comment + p.tokenType = TokenMultiLineComment + if p.pos < len(p.dst) { + p.dst[p.pos] = ' ' // Replace '/' with space + } + if p.pos+1 < len(p.dst) { + p.dst[p.pos+1] = ' ' // Replace '*' with space + } + p.lastChar = nextByte + p.pos += 2 + return + } + + // Not a comment, put the byte back + p.reader.UnreadByte() + case ']', '}': + // Handle trailing comma - look backward + p.handleTrailingComma() + } + p.lastChar = b + p.pos++ +} + +// handleTrailingComma handles the trailing comma by looking backward from the current position +func (p *jsoncParser) handleTrailingComma() { + // Start from one position before the current bracket + startPos := p.pos - 1 + if startPos < 0 { + return + } + + // Find the previous significant (non-whitespace) character + for i := startPos; i >= 0; i-- { + if i >= len(p.dst) { + continue + } + + c := p.dst[i] + switch c { + case ' ', '\t', '\n', '\r': + // Skip whitespace + continue + case ',': + // If it's a comma, replace it with a space + p.dst[i] = ' ' + default: + // Stop after finding the first non-whitespace character + return + } + } +} + +// isPreservedWhitespace returns true for whitespace that should be preserved +func isPreservedWhitespace(c byte) bool { + return c == '\n' || c == '\t' || c == '\r' +} diff --git a/pkg/x/json/jsonc_test.go b/pkg/x/json/jsonc_test.go new file mode 100644 index 000000000000..3a1422ca614d --- /dev/null +++ b/pkg/x/json/jsonc_test.go @@ -0,0 +1,190 @@ +package json_test + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + xjson "github.com/aquasecurity/trivy/pkg/x/json" +) + +func TestToRFC8259(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "no comments", + input: `{"a": 1, "b": 2}`, + want: `{"a": 1, "b": 2}`, + }, + { + name: "single-line comment", + input: "{\n \"a\": 1, // This is a comment\n \"b\": 2\n}", + want: "{\n \"a\": 1, \n \"b\": 2\n}", + }, + { + name: "multi-line comment", + input: "{\n \"a\": 1, /* This is\n a multi-line\n comment */ \"b\": 2\n}", + want: "{\n \"a\": 1, \n \n \"b\": 2\n}", + }, + { + name: "comment with forward slash in string", + input: "{\n \"url\": \"http://example.com\", // Comment\n \"value\": 123\n}", + want: "{\n \"url\": \"http://example.com\", \n \"value\": 123\n}", + }, + { + name: "trailing comma in object", + input: `{"a": 1, "b": 2,}`, + want: `{"a": 1, "b": 2 }`, + }, + { + name: "trailing comma in array", + input: `[1, 2, 3,]`, + want: `[1, 2, 3 ]`, + }, + { + name: "nested trailing commas", + input: `{"a": [1, 2,], "b": {"x": 1, "y": 2,},}`, + want: `{"a": [1, 2 ], "b": {"x": 1, "y": 2 } }`, + }, + { + name: "single-line comment at end of file without newline", + input: `{"a": 1} // Comment`, + want: `{"a": 1} `, + }, + { + name: "multi-line comment at end of file", + input: `{"a": 1} /* Comment */`, + want: `{"a": 1} `, + }, + { + name: "comment within string", + input: `{"text": "This string has // comment syntax"}`, + want: `{"text": "This string has // comment syntax"}`, + }, + { + name: "quoted comment markers", + input: `{"a": "//", "b": "/*", "c": "*/"}`, + want: `{"a": "//", "b": "/*", "c": "*/"}`, + }, + { + name: "escaped quotes in string", + input: `{"text": "String with \"escaped quotes\" // not a comment"}`, + want: `{"text": "String with \"escaped quotes\" // not a comment"}`, + }, + { + name: "complex escaped quotes", + input: `{"text": "String with \\\"double escaped\\\" quotes"}`, + want: `{"text": "String with \\\"double escaped\\\" quotes"}`, + }, + { + name: "real world example", + input: `{ + "name": "my-package", // Package name + "version": "1.0.0", /* Version number */ + "dependencies": { + "lodash": "^4.17.21", + "express": "^4.17.1", // Latest express + }, + "scripts": { + "start": "node index.js", + "test": "jest", + } +}`, + want: `{ + "name": "my-package", + "version": "1.0.0", + "dependencies": { + "lodash": "^4.17.21", + "express": "^4.17.1" + }, + "scripts": { + "start": "node index.js", + "test": "jest" + } +}`, + }, + { + name: "preserves newlines in multiline comments", + input: `{ + "name": "test", // Comment + /* + * Multi-line + * comment + */ + "value": 42 +}`, + want: `{ + "name": "test", + + + + + "value": 42 +}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test ToRFC8259 (allocates new buffer) + got := xjson.ToRFC8259([]byte(tt.input)) + + // Check length preservation + require.Len(t, got, len(tt.input), "output length should match input length") + + // Check content + assert.Equal(t, tt.want, string(got)) + + // Verify newline count is preserved + inputNewlines := bytes.Count([]byte(tt.input), []byte{'\n'}) + outputNewlines := bytes.Count(got, []byte{'\n'}) + assert.Equal(t, inputNewlines, outputNewlines, "number of newlines should be preserved") + + // Make sure the output is valid JSON + var jsonMap any + err := xjson.Unmarshal(got, &jsonMap) + require.NoError(t, err, "result should be valid JSON") + }) + } +} + +func TestUnmarshalJSONC(t *testing.T) { + jsonc := `{ + "name": "test", // This is a comment + "dependencies": { + "lodash": "^4.17.21", /* Another comment */ + "express": "^4.17.1", // Comment + }, // Trailing comment + /* Multi-line + comment */ + "version": "1.0.0" +}` + + type Config struct { + Name string `json:"name"` + Dependencies map[string]string `json:"dependencies"` + Version string `json:"version"` + xjson.Location + } + + var config Config + err := xjson.UnmarshalJSONC([]byte(jsonc), &config) + require.NoError(t, err) + + // Verify the parsed content + assert.Equal(t, "test", config.Name) + assert.Equal(t, "1.0.0", config.Version) + assert.Equal(t, map[string]string{ + "lodash": "^4.17.21", + "express": "^4.17.1", + }, config.Dependencies) + + // Verify location information + assert.Equal(t, 1, config.StartLine) + assert.Equal(t, 10, config.EndLine) +}