diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml index 6b106c639..a42a55baa 100644 --- a/.buildkite/pipeline.yaml +++ b/.buildkite/pipeline.yaml @@ -1,5 +1,88 @@ dag: true +x-if-changed-patterns: + - &if_changed_base_linux + include: + - "packer/linux/base/**" + - "packer/linux/shared/**" + - ".buildkite/steps/packer.sh" + - "Makefile" + + - &if_changed_base_windows + include: + - "packer/windows/base/**" + - "packer/windows/shared/**" + - ".buildkite/steps/packer.sh" + - "Makefile" + + - &if_changed_stack_linux + include: + - "packer/linux/**" + - "plugins/**" + - ".buildkite/steps/packer.sh" + - "Makefile" + - "internal/**" + - "goss.yaml" + exclude: + - "**/*.md" + - "**/README*" + + - &if_changed_stack_windows + include: + - "packer/windows/**" + - "plugins/**" + - ".buildkite/steps/packer.sh" + - "Makefile" + - "internal/**" + exclude: + - "**/*.md" + - "**/README*" + + - &if_changed_launch_test_delete_linux + include: + - "packer/linux/**" + - "plugins/**" + - ".buildkite/steps/packer.sh" + - ".buildkite/steps/launch.sh" + - "templates/**" + - "Makefile" + - "internal/**" + - "goss.yaml" + exclude: + - "**/*.md" + - "**/README*" + + - &if_changed_launch_test_delete_windows + include: + - "packer/windows/**" + - "plugins/**" + - ".buildkite/steps/packer.sh" + - ".buildkite/steps/launch.sh" + - "templates/**" + - "Makefile" + - "internal/**" + exclude: + - "**/*.md" + - "**/README*" + + - &if_changed_deploy_service_role + include: + - "packer/**" + - "plugins/**" + - ".buildkite/steps/packer.sh" + - ".buildkite/steps/launch.sh" + - ".buildkite/steps/delete.sh" + - ".buildkite/steps/deploy-service-role-stack.sh" + - "templates/**" + - "Makefile" + - "internal/**" + - "goss.yaml" + exclude: + - "**/*.md" + - "**/README*" + + + steps: - group: ":lint-roller: Linting" key: linting @@ -46,6 +129,7 @@ steps: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" command: .buildkite/steps/deploy-service-role-stack.sh if: build.source != "schedule" + if_changed: *if_changed_deploy_service_role depends_on: - "linting" - "fixperms-tests" @@ -66,7 +150,7 @@ steps: AMI_PUBLIC: false agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" - if_changed: "packer/windows/base/**" + if_changed: *if_changed_base_windows depends_on: - "linting" - "fixperms-tests" @@ -83,6 +167,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_stack_windows depends_on: - "packer-base-windows-amd64" - "linting" @@ -92,11 +177,14 @@ steps: - id: "launch-windows-amd64" name: ":cloudformation: :windows: AMD64 Launch" - command: .buildkite/steps/launch.sh windows amd64 + command: + - .buildkite/steps/ensure_ami_metadata.py windows amd64 + - .buildkite/steps/launch.sh windows amd64 agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" artifact_paths: "build/aws-stack.yml" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_windows depends_on: - "packer-windows-amd64" - "deploy-service-role-stack" @@ -116,6 +204,7 @@ steps: stack: "buildkite-aws-stack-test-windows-amd64-${BUILDKITE_BUILD_NUMBER}" queue: "testqueue-windows-amd64-${BUILDKITE_BUILD_NUMBER}" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_windows depends_on: - "launch-windows-amd64" @@ -126,6 +215,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_windows depends_on: - "test-windows-amd64" plugins: @@ -139,7 +229,7 @@ steps: AMI_PUBLIC: false agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" - if_changed: "packer/linux/base/**" + if_changed: *if_changed_base_linux depends_on: - "linting" - "fixperms-tests" @@ -156,6 +246,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_stack_linux depends_on: - "packer-base-linux-amd64" - "linting" @@ -165,12 +256,15 @@ steps: - id: "launch-linux-amd64" name: ":cloudformation: :linux: AMD64 Launch" - command: .buildkite/steps/launch.sh linux + command: + - .buildkite/steps/ensure_ami_metadata.py linux amd64 + - .buildkite/steps/launch.sh linux retry: { automatic: { limit: 3 } } agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" artifact_paths: "build/aws-stack.yml" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_linux depends_on: - "packer-linux-amd64" - "deploy-service-role-stack" @@ -190,6 +284,7 @@ steps: stack: "buildkite-aws-stack-test-linux-amd64-${BUILDKITE_BUILD_NUMBER}" queue: "testqueue-linux-amd64-${BUILDKITE_BUILD_NUMBER}" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_linux depends_on: - "launch-linux-amd64" @@ -200,6 +295,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_linux depends_on: - "test-linux-amd64" plugins: @@ -213,7 +309,7 @@ steps: AMI_PUBLIC: false agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" - if_changed: "packer/linux/base/**" + if_changed: *if_changed_base_linux depends_on: - "linting" - "fixperms-tests" @@ -230,6 +326,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_stack_linux depends_on: - "packer-base-linux-arm64" - "linting" @@ -239,11 +336,14 @@ steps: - id: "launch-linux-arm64" name: ":cloudformation: :linux: ARM64 Launch" - command: .buildkite/steps/launch.sh linux arm64 + command: + - .buildkite/steps/ensure_ami_metadata.py linux arm64 + - .buildkite/steps/launch.sh linux arm64 agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" artifact_paths: "build/aws-stack.yml" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_linux depends_on: - "packer-linux-arm64" - "deploy-service-role-stack" @@ -263,6 +363,7 @@ steps: stack: "buildkite-aws-stack-test-linux-arm64-${BUILDKITE_BUILD_NUMBER}" queue: "testqueue-linux-arm64-${BUILDKITE_BUILD_NUMBER}" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_linux depends_on: - "launch-linux-arm64" @@ -273,6 +374,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_launch_test_delete_linux depends_on: - "test-linux-arm64" plugins: @@ -284,6 +386,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_deploy_service_role depends_on: - "delete-windows-amd64" - "delete-linux-amd64" @@ -298,6 +401,7 @@ steps: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" artifact_paths: "build/mappings.yml" if: build.source != "schedule" + if_changed: *if_changed_deploy_service_role depends_on: - "test-linux-amd64" - "test-linux-arm64" @@ -315,6 +419,7 @@ steps: concurrency_method: eager artifact_paths: "build/*.yml" if: build.source != "schedule" + if_changed: *if_changed_deploy_service_role depends_on: "copy-ami" plugins: - *aws_role_plugin @@ -325,6 +430,7 @@ steps: agents: queue: "${BUILDKITE_AGENT_META_DATA_QUEUE}" if: build.source != "schedule" + if_changed: *if_changed_deploy_service_role depends_on: "publish" plugins: - *aws_role_plugin diff --git a/.buildkite/steps/ensure_ami_metadata.py b/.buildkite/steps/ensure_ami_metadata.py new file mode 100755 index 000000000..c06e81c34 --- /dev/null +++ b/.buildkite/steps/ensure_ami_metadata.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +""" +Ensure AMI metadata is set for Stack AMI builds. + +This script checks if the packer build step set AMI metadata. If not, +it fetches the AMI ID from the main branch CloudFormation template, +which happens when the build was skipped due to if_changed conditions. +""" + +import os +import re +import subprocess +import sys +import urllib.request +from typing import Optional + + +def get_metadata(key: str) -> Optional[str]: + """Get metadata from Buildkite agent, return None if not found.""" + try: + result = subprocess.run( + ["buildkite-agent", "meta-data", "get", key], + capture_output=True, + text=True, + check=False, + ) + if result.returncode == 0 and result.stdout.strip(): + return result.stdout.strip() + except FileNotFoundError: + print("Warning: buildkite-agent not found", file=sys.stderr) + return None + + +def set_metadata(key: str, value: str) -> None: + """Set metadata in Buildkite agent.""" + try: + subprocess.run( + ["buildkite-agent", "meta-data", "set", key, value], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Failed to set metadata: {e.stderr}", file=sys.stderr) + raise + except FileNotFoundError: + print("ERROR: buildkite-agent not found", file=sys.stderr) + raise + + +def fetch_ami_from_template(os_type: str, arch: str, region: str) -> str: + """ + Fetch AMI ID from the main branch CloudFormation template. + + Args: + os_type: Operating system (linux or windows) + arch: Architecture (amd64 or arm64) + region: AWS region + + Returns: + AMI ID string + + Raises: + RuntimeError: If AMI cannot be found + """ + template_url = "https://s3.amazonaws.com/buildkite-aws-stack/main/aws-stack.yml" + + print(f"--- Fetching AMI ID from main branch template for {os_type}/{arch}") + + try: + with urllib.request.urlopen(template_url) as response: + template_content = response.read().decode("utf-8") + except Exception as e: + raise RuntimeError( + f"Failed to download main branch template from {template_url}: {e}" + ) from e + + if os_type == "windows": + key_name = "windows" + elif arch == "arm64": + key_name = "linuxarm64" + else: + key_name = "linuxamd64" + + # Template format: " us-east-1: { linuxamd64: ami-xxx, linuxarm64: ami-yyy, windows: ami-zzz }" + pattern = rf"^\s+{re.escape(region)}\s*:.*{key_name}:\s*(ami-[a-z0-9]+)" + + for line in template_content.split("\n"): + match = re.search(pattern, line) + if match: + ami_id = match.group(1) + print(f"Found AMI ID: {ami_id}") + return ami_id + + raise RuntimeError( + f"Could not find AMI ID for region {region}, os {os_type}, arch {arch} in main template" + ) + + +def ensure_ami_metadata(os_type: str, arch: str) -> None: + """ + Ensure AMI metadata is set, fetching from template if necessary. + + Args: + os_type: Operating system (linux or windows) + arch: Architecture (amd64 or arm64) + """ + metadata_key = f"{os_type}_{arch}_image_id" + + existing_ami = get_metadata(metadata_key) + if existing_ami: + print(f"AMI metadata already set: {existing_ami}") + return + + region = os.environ.get("AWS_REGION") + if not region: + raise RuntimeError("AWS_REGION environment variable not set") + + print("AMI metadata not found, fetching from main branch template...") + ami_id = fetch_ami_from_template(os_type, arch, region) + + set_metadata(metadata_key, ami_id) + print(f"Set AMI metadata: {metadata_key}={ami_id}") + + +def main() -> int: + """Main entry point.""" + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + print(" os: linux or windows", file=sys.stderr) + print(" arch: amd64 or arm64", file=sys.stderr) + return 1 + + os_type = sys.argv[1] + arch = sys.argv[2] + + if os_type not in ("linux", "windows"): + print( + f"ERROR: Invalid OS '{os_type}', must be 'linux' or 'windows'", + file=sys.stderr, + ) + return 1 + + if arch not in ("amd64", "arm64"): + print( + f"ERROR: Invalid arch '{arch}', must be 'amd64' or 'arm64'", file=sys.stderr + ) + return 1 + + try: + ensure_ami_metadata(os_type, arch) + return 0 + except Exception as e: + print(f"ERROR: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main())