Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions sast-engine/graph/docker/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package docker

import (
"context"
"fmt"
"os"

sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/dockerfile"
)

// DockerfileParser handles parsing of Dockerfile content using tree-sitter.
type DockerfileParser struct {
parser *sitter.Parser
}

// NewDockerfileParser creates a new Dockerfile parser.
func NewDockerfileParser() *DockerfileParser {
parser := sitter.NewParser()
parser.SetLanguage(dockerfile.GetLanguage())
return &DockerfileParser{parser: parser}
}

// ParseFile parses a Dockerfile from a file path.
func (dp *DockerfileParser) ParseFile(filePath string) (*DockerfileGraph, error) {
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read Dockerfile: %w", err)
}
return dp.Parse(filePath, content)
}

// Parse parses Dockerfile content and returns a DockerfileGraph.
func (dp *DockerfileParser) Parse(filePath string, content []byte) (*DockerfileGraph, error) {
// Parse into tree-sitter AST
tree, err := dp.parser.ParseCtx(context.Background(), nil, content)
if err != nil {
return nil, fmt.Errorf("failed to parse Dockerfile: %w", err)
}
defer tree.Close()

rootNode := tree.RootNode()

// Check for syntax errors
if rootNode.HasError() {
// Log warning but continue (partial parsing is useful)
// log.Printf("Warning: Dockerfile has syntax errors: %s", filePath)
}

// Create graph
graph := NewDockerfileGraph(filePath)

// Convert AST to DockerfileGraph
dp.convertASTToGraph(rootNode, content, graph)

return graph, nil
}

// convertASTToGraph traverses the tree-sitter AST and populates DockerfileGraph.
func (dp *DockerfileParser) convertASTToGraph(
rootNode *sitter.Node,
source []byte,
graph *DockerfileGraph,
) {
// Iterate through all child nodes
for i := 0; i < int(rootNode.ChildCount()); i++ {
child := rootNode.Child(i)

// Skip non-instruction nodes (comments, blank lines).
if !isInstructionNode(child) {
continue
}

// Convert to DockerfileNode (implemented in PR #3).
node := dp.convertInstruction(child, source)

graph.AddInstruction(node)
}

// Analyze build stages after all instructions parsed.
graph.AnalyzeBuildStages()
}

// isInstructionNode checks if a tree-sitter node represents a Dockerfile instruction.
func isInstructionNode(node *sitter.Node) bool {
nodeType := node.Type()
instructionTypes := map[string]bool{
"from_instruction": true,
"run_instruction": true,
"copy_instruction": true,
"add_instruction": true,
"env_instruction": true,
"arg_instruction": true,
"user_instruction": true,
"expose_instruction": true,
"workdir_instruction": true,
"cmd_instruction": true,
"entrypoint_instruction": true,
"volume_instruction": true,
"shell_instruction": true,
"healthcheck_instruction": true,
"label_instruction": true,
"onbuild_instruction": true,
"stopsignal_instruction": true,
"maintainer_instruction": true,
}
return instructionTypes[nodeType]
}

// convertInstruction is a placeholder for PR #3.
// It will be replaced with actual conversion logic.
func (dp *DockerfileParser) convertInstruction(
node *sitter.Node,
source []byte,
) *DockerfileNode {
// Placeholder implementation - creates basic node with type and line.
// Full implementation in PR #3.

nodeType := node.Type()
instructionType := extractInstructionType(nodeType)

dockerNode := NewDockerfileNode(
instructionType,
int(node.StartPoint().Row)+1, // 1-indexed line number
)
dockerNode.RawInstruction = node.Content(source)

return dockerNode
}

// extractInstructionType converts tree-sitter node type to instruction name.
// For example, "from_instruction" becomes "FROM".
func extractInstructionType(nodeType string) string {
typeMap := map[string]string{
"from_instruction": "FROM",
"run_instruction": "RUN",
"copy_instruction": "COPY",
"add_instruction": "ADD",
"env_instruction": "ENV",
"arg_instruction": "ARG",
"user_instruction": "USER",
"expose_instruction": "EXPOSE",
"workdir_instruction": "WORKDIR",
"cmd_instruction": "CMD",
"entrypoint_instruction": "ENTRYPOINT",
"volume_instruction": "VOLUME",
"shell_instruction": "SHELL",
"healthcheck_instruction": "HEALTHCHECK",
"label_instruction": "LABEL",
"onbuild_instruction": "ONBUILD",
"stopsignal_instruction": "STOPSIGNAL",
"maintainer_instruction": "MAINTAINER",
}
if t, ok := typeMap[nodeType]; ok {
return t
}
return nodeType
}
196 changes: 196 additions & 0 deletions sast-engine/graph/docker/parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package docker

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestNewDockerfileParser(t *testing.T) {
parser := NewDockerfileParser()
assert.NotNil(t, parser)
assert.NotNil(t, parser.parser)
}

func TestDockerfileParser_Parse_Simple(t *testing.T) {
parser := NewDockerfileParser()

dockerfile := []byte(`FROM ubuntu:20.04
RUN apt-get update
USER appuser
CMD ["/bin/bash"]
`)

graph, err := parser.Parse("Dockerfile", dockerfile)

assert.NoError(t, err)
assert.NotNil(t, graph)
assert.Equal(t, 4, graph.TotalInstructions)
assert.True(t, graph.HasInstruction("FROM"))
assert.True(t, graph.HasInstruction("RUN"))
assert.True(t, graph.HasInstruction("USER"))
assert.True(t, graph.HasInstruction("CMD"))
}

func TestDockerfileParser_Parse_MultiStage(t *testing.T) {
parser := NewDockerfileParser()

dockerfile := []byte(`FROM golang:1.21 AS builder
RUN go build -o app

FROM alpine:3.18
COPY --from=builder /app /app
CMD ["/app"]
`)

graph, err := parser.Parse("Dockerfile", dockerfile)

assert.NoError(t, err)
assert.True(t, graph.IsMultiStage())
assert.Equal(t, 5, graph.TotalInstructions)
assert.Equal(t, 2, len(graph.GetInstructions("FROM")))
}

func TestDockerfileParser_Parse_AllInstructions(t *testing.T) {
parser := NewDockerfileParser()

// Dockerfile with all 18 instruction types
dockerfile := []byte(`FROM ubuntu:20.04 AS base
MAINTAINER [email protected]
RUN apt-get update
COPY src /app/src
ADD archive.tar.gz /app/
ENV APP_ENV=production
ARG VERSION=1.0
USER appuser
EXPOSE 8080/tcp
WORKDIR /app
VOLUME ["/data"]
SHELL ["/bin/bash", "-c"]
HEALTHCHECK --interval=30s CMD curl -f http://localhost/
LABEL version="1.0"
STOPSIGNAL SIGTERM
ONBUILD RUN echo "building"
CMD ["./app"]
ENTRYPOINT ["/entrypoint.sh"]
`)

graph, err := parser.Parse("Dockerfile", dockerfile)

assert.NoError(t, err)
assert.Equal(t, 18, graph.TotalInstructions)

// Verify each instruction type is present
instructionTypes := []string{
"FROM", "MAINTAINER", "RUN", "COPY", "ADD", "ENV", "ARG",
"USER", "EXPOSE", "WORKDIR", "VOLUME", "SHELL", "HEALTHCHECK",
"LABEL", "STOPSIGNAL", "ONBUILD", "CMD", "ENTRYPOINT",
}

for _, instType := range instructionTypes {
assert.True(t, graph.HasInstruction(instType),
"Missing instruction: %s", instType)
}
}

func TestDockerfileParser_Parse_EmptyDockerfile(t *testing.T) {
parser := NewDockerfileParser()

dockerfile := []byte(`# Just a comment
`)

graph, err := parser.Parse("Dockerfile", dockerfile)

assert.NoError(t, err)
assert.Equal(t, 0, graph.TotalInstructions)
}

func TestDockerfileParser_Parse_LineNumbers(t *testing.T) {
parser := NewDockerfileParser()

dockerfile := []byte(`# Comment
FROM ubuntu:20.04

RUN apt-get update
`)

graph, err := parser.Parse("Dockerfile", dockerfile)

assert.NoError(t, err)

fromNodes := graph.GetInstructions("FROM")
assert.Equal(t, 1, len(fromNodes))
assert.Equal(t, 2, fromNodes[0].LineNumber)

runNodes := graph.GetInstructions("RUN")
assert.Equal(t, 1, len(runNodes))
assert.Equal(t, 4, runNodes[0].LineNumber)
}

func TestIsInstructionNode(t *testing.T) {
tests := []struct {
nodeType string
expected bool
}{
{"from_instruction", true},
{"run_instruction", true},
{"copy_instruction", true},
{"comment", false},
{"blank_line", false},
{"source_file", false},
}

for _, tt := range tests {
t.Run(tt.nodeType, func(t *testing.T) {
// Create mock node (for testing helper logic)
result := isInstructionNodeType(tt.nodeType)
assert.Equal(t, tt.expected, result)
})
}
}

// Helper for testing without actual tree-sitter node.
func isInstructionNodeType(nodeType string) bool {
instructionTypes := map[string]bool{
"from_instruction": true,
"run_instruction": true,
"copy_instruction": true,
"add_instruction": true,
"env_instruction": true,
"arg_instruction": true,
"user_instruction": true,
"expose_instruction": true,
"workdir_instruction": true,
"cmd_instruction": true,
"entrypoint_instruction": true,
"volume_instruction": true,
"shell_instruction": true,
"healthcheck_instruction": true,
"label_instruction": true,
"onbuild_instruction": true,
"stopsignal_instruction": true,
"maintainer_instruction": true,
}
return instructionTypes[nodeType]
}

func TestExtractInstructionType(t *testing.T) {
tests := []struct {
nodeType string
expected string
}{
{"from_instruction", "FROM"},
{"run_instruction", "RUN"},
{"copy_instruction", "COPY"},
{"user_instruction", "USER"},
{"healthcheck_instruction", "HEALTHCHECK"},
{"unknown_type", "unknown_type"},
}

for _, tt := range tests {
t.Run(tt.nodeType, func(t *testing.T) {
result := extractInstructionType(tt.nodeType)
assert.Equal(t, tt.expected, result)
})
}
}
Loading