diff --git a/sast-engine/graph/docker/parser.go b/sast-engine/graph/docker/parser.go new file mode 100644 index 00000000..02af401a --- /dev/null +++ b/sast-engine/graph/docker/parser.go @@ -0,0 +1,158 @@ +package docker + +import ( + "context" + "fmt" + "os" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/dockerfile" +) + +// DockerfileParser handles parsing of Dockerfile content using tree-sitter. +type DockerfileParser struct { + parser *sitter.Parser +} + +// NewDockerfileParser creates a new Dockerfile parser. +func NewDockerfileParser() *DockerfileParser { + parser := sitter.NewParser() + parser.SetLanguage(dockerfile.GetLanguage()) + return &DockerfileParser{parser: parser} +} + +// ParseFile parses a Dockerfile from a file path. +func (dp *DockerfileParser) ParseFile(filePath string) (*DockerfileGraph, error) { + content, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("failed to read Dockerfile: %w", err) + } + return dp.Parse(filePath, content) +} + +// Parse parses Dockerfile content and returns a DockerfileGraph. +func (dp *DockerfileParser) Parse(filePath string, content []byte) (*DockerfileGraph, error) { + // Parse into tree-sitter AST + tree, err := dp.parser.ParseCtx(context.Background(), nil, content) + if err != nil { + return nil, fmt.Errorf("failed to parse Dockerfile: %w", err) + } + defer tree.Close() + + rootNode := tree.RootNode() + + // Check for syntax errors + if rootNode.HasError() { + // Log warning but continue (partial parsing is useful) + // log.Printf("Warning: Dockerfile has syntax errors: %s", filePath) + } + + // Create graph + graph := NewDockerfileGraph(filePath) + + // Convert AST to DockerfileGraph + dp.convertASTToGraph(rootNode, content, graph) + + return graph, nil +} + +// convertASTToGraph traverses the tree-sitter AST and populates DockerfileGraph. +func (dp *DockerfileParser) convertASTToGraph( + rootNode *sitter.Node, + source []byte, + graph *DockerfileGraph, +) { + // Iterate through all child nodes + for i := 0; i < int(rootNode.ChildCount()); i++ { + child := rootNode.Child(i) + + // Skip non-instruction nodes (comments, blank lines). + if !isInstructionNode(child) { + continue + } + + // Convert to DockerfileNode (implemented in PR #3). + node := dp.convertInstruction(child, source) + + graph.AddInstruction(node) + } + + // Analyze build stages after all instructions parsed. + graph.AnalyzeBuildStages() +} + +// isInstructionNode checks if a tree-sitter node represents a Dockerfile instruction. +func isInstructionNode(node *sitter.Node) bool { + nodeType := node.Type() + instructionTypes := map[string]bool{ + "from_instruction": true, + "run_instruction": true, + "copy_instruction": true, + "add_instruction": true, + "env_instruction": true, + "arg_instruction": true, + "user_instruction": true, + "expose_instruction": true, + "workdir_instruction": true, + "cmd_instruction": true, + "entrypoint_instruction": true, + "volume_instruction": true, + "shell_instruction": true, + "healthcheck_instruction": true, + "label_instruction": true, + "onbuild_instruction": true, + "stopsignal_instruction": true, + "maintainer_instruction": true, + } + return instructionTypes[nodeType] +} + +// convertInstruction is a placeholder for PR #3. +// It will be replaced with actual conversion logic. +func (dp *DockerfileParser) convertInstruction( + node *sitter.Node, + source []byte, +) *DockerfileNode { + // Placeholder implementation - creates basic node with type and line. + // Full implementation in PR #3. + + nodeType := node.Type() + instructionType := extractInstructionType(nodeType) + + dockerNode := NewDockerfileNode( + instructionType, + int(node.StartPoint().Row)+1, // 1-indexed line number + ) + dockerNode.RawInstruction = node.Content(source) + + return dockerNode +} + +// extractInstructionType converts tree-sitter node type to instruction name. +// For example, "from_instruction" becomes "FROM". +func extractInstructionType(nodeType string) string { + typeMap := map[string]string{ + "from_instruction": "FROM", + "run_instruction": "RUN", + "copy_instruction": "COPY", + "add_instruction": "ADD", + "env_instruction": "ENV", + "arg_instruction": "ARG", + "user_instruction": "USER", + "expose_instruction": "EXPOSE", + "workdir_instruction": "WORKDIR", + "cmd_instruction": "CMD", + "entrypoint_instruction": "ENTRYPOINT", + "volume_instruction": "VOLUME", + "shell_instruction": "SHELL", + "healthcheck_instruction": "HEALTHCHECK", + "label_instruction": "LABEL", + "onbuild_instruction": "ONBUILD", + "stopsignal_instruction": "STOPSIGNAL", + "maintainer_instruction": "MAINTAINER", + } + if t, ok := typeMap[nodeType]; ok { + return t + } + return nodeType +} diff --git a/sast-engine/graph/docker/parser_test.go b/sast-engine/graph/docker/parser_test.go new file mode 100644 index 00000000..f11af85c --- /dev/null +++ b/sast-engine/graph/docker/parser_test.go @@ -0,0 +1,196 @@ +package docker + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewDockerfileParser(t *testing.T) { + parser := NewDockerfileParser() + assert.NotNil(t, parser) + assert.NotNil(t, parser.parser) +} + +func TestDockerfileParser_Parse_Simple(t *testing.T) { + parser := NewDockerfileParser() + + dockerfile := []byte(`FROM ubuntu:20.04 +RUN apt-get update +USER appuser +CMD ["/bin/bash"] +`) + + graph, err := parser.Parse("Dockerfile", dockerfile) + + assert.NoError(t, err) + assert.NotNil(t, graph) + assert.Equal(t, 4, graph.TotalInstructions) + assert.True(t, graph.HasInstruction("FROM")) + assert.True(t, graph.HasInstruction("RUN")) + assert.True(t, graph.HasInstruction("USER")) + assert.True(t, graph.HasInstruction("CMD")) +} + +func TestDockerfileParser_Parse_MultiStage(t *testing.T) { + parser := NewDockerfileParser() + + dockerfile := []byte(`FROM golang:1.21 AS builder +RUN go build -o app + +FROM alpine:3.18 +COPY --from=builder /app /app +CMD ["/app"] +`) + + graph, err := parser.Parse("Dockerfile", dockerfile) + + assert.NoError(t, err) + assert.True(t, graph.IsMultiStage()) + assert.Equal(t, 5, graph.TotalInstructions) + assert.Equal(t, 2, len(graph.GetInstructions("FROM"))) +} + +func TestDockerfileParser_Parse_AllInstructions(t *testing.T) { + parser := NewDockerfileParser() + + // Dockerfile with all 18 instruction types + dockerfile := []byte(`FROM ubuntu:20.04 AS base +MAINTAINER test@example.com +RUN apt-get update +COPY src /app/src +ADD archive.tar.gz /app/ +ENV APP_ENV=production +ARG VERSION=1.0 +USER appuser +EXPOSE 8080/tcp +WORKDIR /app +VOLUME ["/data"] +SHELL ["/bin/bash", "-c"] +HEALTHCHECK --interval=30s CMD curl -f http://localhost/ +LABEL version="1.0" +STOPSIGNAL SIGTERM +ONBUILD RUN echo "building" +CMD ["./app"] +ENTRYPOINT ["/entrypoint.sh"] +`) + + graph, err := parser.Parse("Dockerfile", dockerfile) + + assert.NoError(t, err) + assert.Equal(t, 18, graph.TotalInstructions) + + // Verify each instruction type is present + instructionTypes := []string{ + "FROM", "MAINTAINER", "RUN", "COPY", "ADD", "ENV", "ARG", + "USER", "EXPOSE", "WORKDIR", "VOLUME", "SHELL", "HEALTHCHECK", + "LABEL", "STOPSIGNAL", "ONBUILD", "CMD", "ENTRYPOINT", + } + + for _, instType := range instructionTypes { + assert.True(t, graph.HasInstruction(instType), + "Missing instruction: %s", instType) + } +} + +func TestDockerfileParser_Parse_EmptyDockerfile(t *testing.T) { + parser := NewDockerfileParser() + + dockerfile := []byte(`# Just a comment +`) + + graph, err := parser.Parse("Dockerfile", dockerfile) + + assert.NoError(t, err) + assert.Equal(t, 0, graph.TotalInstructions) +} + +func TestDockerfileParser_Parse_LineNumbers(t *testing.T) { + parser := NewDockerfileParser() + + dockerfile := []byte(`# Comment +FROM ubuntu:20.04 + +RUN apt-get update +`) + + graph, err := parser.Parse("Dockerfile", dockerfile) + + assert.NoError(t, err) + + fromNodes := graph.GetInstructions("FROM") + assert.Equal(t, 1, len(fromNodes)) + assert.Equal(t, 2, fromNodes[0].LineNumber) + + runNodes := graph.GetInstructions("RUN") + assert.Equal(t, 1, len(runNodes)) + assert.Equal(t, 4, runNodes[0].LineNumber) +} + +func TestIsInstructionNode(t *testing.T) { + tests := []struct { + nodeType string + expected bool + }{ + {"from_instruction", true}, + {"run_instruction", true}, + {"copy_instruction", true}, + {"comment", false}, + {"blank_line", false}, + {"source_file", false}, + } + + for _, tt := range tests { + t.Run(tt.nodeType, func(t *testing.T) { + // Create mock node (for testing helper logic) + result := isInstructionNodeType(tt.nodeType) + assert.Equal(t, tt.expected, result) + }) + } +} + +// Helper for testing without actual tree-sitter node. +func isInstructionNodeType(nodeType string) bool { + instructionTypes := map[string]bool{ + "from_instruction": true, + "run_instruction": true, + "copy_instruction": true, + "add_instruction": true, + "env_instruction": true, + "arg_instruction": true, + "user_instruction": true, + "expose_instruction": true, + "workdir_instruction": true, + "cmd_instruction": true, + "entrypoint_instruction": true, + "volume_instruction": true, + "shell_instruction": true, + "healthcheck_instruction": true, + "label_instruction": true, + "onbuild_instruction": true, + "stopsignal_instruction": true, + "maintainer_instruction": true, + } + return instructionTypes[nodeType] +} + +func TestExtractInstructionType(t *testing.T) { + tests := []struct { + nodeType string + expected string + }{ + {"from_instruction", "FROM"}, + {"run_instruction", "RUN"}, + {"copy_instruction", "COPY"}, + {"user_instruction", "USER"}, + {"healthcheck_instruction", "HEALTHCHECK"}, + {"unknown_type", "unknown_type"}, + } + + for _, tt := range tests { + t.Run(tt.nodeType, func(t *testing.T) { + result := extractInstructionType(tt.nodeType) + assert.Equal(t, tt.expected, result) + }) + } +}