Azure · awesomenix · Mar 25, 2026 · Mar 16, 2026 · cameronmeissner · Mar 17, 2026
@@ -218,6 +218,15 @@ var (
 		OSDiskSizeGB: 60,
 	}
 
+	VHDAzureLinuxV3Gen2Arm64 = &Image{
+		Name:         "azurelinuxv3gen2arm64",
+		OS:           OSAzureLinux,
+		Arch:         "arm64",
+		Distro:       datamodel.AKSAzureLinuxV3Arm64Gen2,
+		Gallery:      imageGalleryLinux,
+		OSDiskSizeGB: 60,
+	}
+
 	VHDACLGen2TL = &Image{
 		Name:         "aclgen2TL",
 		OS:           OSACL,

@@ -190,6 +190,9 @@ func Test_DCGM_Exporter_Compatibility(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			RunScenario(t, &Scenario{
 				Description: tc.description,
+				Tags: Tags{
+					GPU: true,
+				},
 				Config: Config{
 					Cluster:                ClusterKubenet,
 					VHD:                    tc.vhd,

@@ -106,6 +106,25 @@ func Test_Flatcar_ARM64(t *testing.T) {
 	})
 }
 
+func Test_AzureLinuxV3_ARM64(t *testing.T) {
+	RunScenario(t, &Scenario{
+		Description: "Tests that a node using a AzureLinuxV3 VHD on ARM64 architecture can be properly bootstrapped",
+		Config: Config{
+			Cluster: ClusterKubenet,
+			VHD:     config.VHDAzureLinuxV3Gen2Arm64,
+			BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) {
+				nbc.AgentPoolProfile.VMSize = "Standard_D2pds_V5"
+				nbc.IsARM64 = true
+			},
+			Validator: func(ctx context.Context, s *Scenario) {
+			},
+			VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) {
+				vmss.SKU.Name = to.Ptr("Standard_D2pds_V5")
+			},
+		},
+	})
+}
+
 func Test_Flatcar_AzureCNI(t *testing.T) {
 	RunScenario(t, &Scenario{
 		Description: "Flatcar scenario on a cluster configured with Azure CNI",

@@ -164,20 +164,16 @@ configureCustomCaCertificate() {
     done
     # blocks until svc is considered active, which will happen when ExecStart command terminates with code 0
     systemctl restart update_certs.service || exit $ERR_UPDATE_CA_CERTS
-    # containerd has to be restarted after new certs are added to the trust store, otherwise they will not be used until restart happens
-    systemctl restart containerd
 }
 
 configureContainerdUlimits() {
   CONTAINERD_ULIMIT_DROP_IN_FILE_PATH="/etc/systemd/system/containerd.service.d/set_ulimits.conf"
+  mkdir -p "$(dirname "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}")"
   touch "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}"
   chmod 0600 "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}"
   tee "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}" > /dev/null <<EOF
 $(echo "$CONTAINERD_ULIMITS" | tr ' ' '\n')
 EOF
-
-  systemctl daemon-reload
-  systemctl restart containerd
 }
 
 # file paths defined outside so configureAzureJson can be unit tested
@@ -301,9 +297,10 @@ EOF
 }
 
 configureCNI() {
-    # needed for the iptables rules to work on bridges
-    retrycmd_if_failure 120 5 25 modprobe br_netfilter || exit $ERR_MODPROBE_FAIL
+    # needed for bridge iptables rules and customer-configured conntrack sysctls
+    retrycmd_if_failure 120 5 25 modprobe -a br_netfilter nf_conntrack || exit $ERR_MODPROBE_FAIL
     echo -n "br_netfilter" > /etc/modules-load.d/br_netfilter.conf
+    echo -n "nf_conntrack" > /etc/modules-load.d/nf_conntrack.conf
     configureCNIIPTables
 }
 
@@ -381,7 +378,7 @@ net.ipv6.conf.all.forwarding = 1
 net.bridge.bridge-nf-call-iptables = 1
 EOF
   retrycmd_if_failure 120 5 25 sysctl --system || exit $ERR_SYSCTL_RELOAD
-  systemctlEnableAndStart containerd 30 || exit $ERR_SYSTEMCTL_START_FAIL
+  systemctlEnableAndStartNoBlock containerd 30 || exit $ERR_SYSTEMCTL_START_FAIL
 }
 
 configureContainerdRegistryHost() {
@@ -424,6 +421,7 @@ ensureNoDupOnPromiscuBridge() {
 }
 
 ensureArtifactStreaming() {
+  waitForContainerdReady || exit $ERR_ARTIFACT_STREAMING_INSTALL
   retrycmd_if_failure 120 5 25 time systemctl --quiet enable --now  acr-mirror overlaybd-tcmu overlaybd-snapshotter
   time /opt/acr/bin/acr-config --enable-containerd 'azurecr.io'
 }
@@ -566,6 +564,8 @@ ensurePodInfraContainerImage() {
     POD_INFRA_CONTAINER_IMAGE_DOWNLOAD_DIR="/opt/pod-infra-container-image/downloads"
     POD_INFRA_CONTAINER_IMAGE_TAR="/opt/pod-infra-container-image/pod-infra-container-image.tar"
 
+    waitForContainerdReady || exit $ERR_PULL_POD_INFRA_CONTAINER_IMAGE
+
     pod_infra_container_image=$(get_sandbox_image)
 
     if [ -z "${pod_infra_container_image}" ]; then
@@ -776,17 +776,22 @@ EOF
         logs_to_events "AKS.CSE.ensureKubelet.ensurePodInfraContainerImage" ensurePodInfraContainerImage
     fi
 
-    # start measure-tls-bootstrapping-latency.service without waiting for the main process to start, while ignoring any failures
-    if ! systemctlEnableAndStartNoBlock measure-tls-bootstrapping-latency 30; then
-        echo "failed to start measure-tls-bootstrapping-latency.service"
-    fi
+    local tls_bootstrapping_start_time_filepath="/opt/azure/containers/tls-bootstrap-start-time"
+    date +"%F %T.%3N" > "${tls_bootstrapping_start_time_filepath}"
 
     # start kubelet.service without waiting for the main process to start, though check whether it has entered a failed state after enablement
     if ! systemctlEnableAndStartNoBlock kubelet 240; then
         # append kubelet status to CSE output to ensure we can see it
+        rm -f "${tls_bootstrapping_start_time_filepath}"
         journalctl -u kubelet.service --no-pager || true
         exit $ERR_KUBELET_START_FAIL
     fi
+
+    # start measure-tls-bootstrapping-latency.service without waiting for the main process to start, while ignoring any failures
+    if ! systemctlEnableAndStartNoBlock measure-tls-bootstrapping-latency 30; then
+        rm -f "${tls_bootstrapping_start_time_filepath}"
+        echo "failed to start measure-tls-bootstrapping-latency.service"
+    fi
 }
 
 ensureSnapshotUpdate() {
@@ -923,6 +928,7 @@ configAzurePolicyAddon() {
 
 configGPUDrivers() {
     if [ "$OS" = "$UBUNTU_OS_NAME" ]; then
+        waitForContainerdReady || exit $ERR_GPU_DRIVERS_START_FAIL
         mkdir -p /opt/{actions,gpu}
         ctr -n k8s.io image pull $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG
         retrycmd_if_failure 5 10 600 bash -c "$CTR_GPU_INSTALL_CMD $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG gpuinstall /entrypoint.sh install"

@@ -507,19 +507,20 @@ systemctlEnableAndStart() {
     service=$1; timeout=$2
     systemctl_restart 100 5 $timeout $service
     RESTART_STATUS=$?
-    systemctl status $service --no-pager -l > /var/log/azure/$service-status.log
     if [ $RESTART_STATUS -ne 0 ]; then
         echo "$service could not be started"
+        systemctl status $service --no-pager -l > /var/log/azure/$service-status.log || true
         return 1
     fi
     if ! retrycmd_if_failure 120 5 25 systemctl enable $service; then
         echo "$service could not be enabled by systemctl"
+        systemctl status $service --no-pager -l > /var/log/azure/$service-status.log || true
         return 1
     fi
 }
 
 systemctlEnableAndStartNoBlock() {
-    service=$1; timeout=$2; status_check_delay_seconds=${3:-"0"}
+    service=$1; timeout=$2
 
     systemctl_restart_no_block 100 5 $timeout $service
     RESTART_STATUS=$?
@@ -534,21 +535,36 @@ systemctlEnableAndStartNoBlock() {
         systemctl status $service --no-pager -l > /var/log/azure/$service-status.log || true
         return 1
     fi
+}
+
+checkServiceHealth() {
+    local service=$1
+    local state=$(systemctl show -p ActiveState --value "$service")
+
+    if [ "$state" = "active" ]; then
+       return 0
+    fi
 
-    # wait for the specified delay seconds before checking the service status to make sure
-    # it hasn't gone into a failed state
-    sleep $status_check_delay_seconds
+    systemctl status "$service" --no-pager -l > "/var/log/azure/$service-status.log" || true
 
-    if systemctl is-failed $service; then
+    if [ "$state" = "failed" ]; then
         echo "$service is in a failed state"
-        systemctl status $service --no-pager -l > /var/log/azure/$service-status.log || true
         return 1
+    elif [ "$state" = "activating" ]; then
+        echo "$service is still activating, continuing anyway..."
     fi
+}
 
-    # systemctl status only exits with code 0 iff the service is "active",
-    # thus we handle the "activating" case by checking for a non-zero exit code
-    if ! systemctl status $service --no-pager -l > /var/log/azure/$service-status.log; then
-        echo "$service is still activating, continuing anyway..."
+waitForContainerdReady() {
+    local ret=0
+
+    echo "Waiting for containerd to become ready..."
+    retrycmd_if_failure 60 0.1 1 bash -c 'ctr version >/dev/null 2>&1'
+    ret=$?
+    if [ "$ret" -ne 0 ]; then
+        echo "containerd did not become ready"
+        systemctl status containerd --no-pager -l > /var/log/azure/containerd-status.log || true
+        return 1
     fi
 }
 

@@ -618,8 +618,12 @@ installKubeletKubectlFromURL() {
             fi
         fi
     fi
-    install -m0755 "/opt/bin/kubelet-${KUBERNETES_VERSION}" /opt/bin/kubelet
-    install -m0755 "/opt/bin/kubectl-${KUBERNETES_VERSION}" /opt/bin/kubectl
+
+    mv "/opt/bin/kubelet-${KUBERNETES_VERSION}" /opt/bin/kubelet
+    mv "/opt/bin/kubectl-${KUBERNETES_VERSION}" /opt/bin/kubectl
+
+    chown root:root /opt/bin/kubelet /opt/bin/kubectl
+    chmod 0755 /opt/bin/kubelet /opt/bin/kubectl
 
     rm -rf /opt/bin/kubelet-* /opt/bin/kubectl-* /home/hyperkube-downloads &
 }
@@ -686,8 +690,9 @@ labelContainerImage() {
 }
 
 retagMCRImagesForChina() {
+    waitForContainerdReady || exit $ERR_CTR_OPERATION_ERROR
     # shellcheck disable=SC2016
-        allMCRImages=($(ctr --namespace k8s.io images list | grep '^mcr.microsoft.com/' | awk '{print $1}'))
+    allMCRImages=($(ctr --namespace k8s.io images list | grep '^mcr.microsoft.com/' | awk '{print $1}'))
     if [ -z "${allMCRImages}" ]; then
         echo "failed to find mcr images for retag"
         return

@@ -152,9 +152,13 @@ function basePrep {
         echo "Golden image; skipping dependencies installation"
     fi
 
-    # Container runtime already installed on Azure Linux OS Guard
-    if ! isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then
+    # Container runtime already installed on Azure Linux OS Guard; an explicit containerd override can bypass FULL_INSTALL_REQUIRED for other Linux distros
+    if isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then
+        echo "Skipping installContainerRuntime because containerd is already available"
+    elif [ "$FULL_INSTALL_REQUIRED" = "true" ] || [ -n "${CONTAINERD_PACKAGE_URL}" ]; then
         logs_to_events "AKS.CSE.installContainerRuntime" installContainerRuntime
+    else
+        echo "Skipping installContainerRuntime because containerd is already available"
     fi
     setupCNIDirs
 
@@ -170,6 +174,9 @@ function basePrep {
     SHOULD_ENFORCE_KUBE_PMC_INSTALL=$(should_enforce_kube_pmc_install)
     logs_to_events "AKS.CSE.configureKubeletAndKubectl" configureKubeletAndKubectl
 
+    # pre-warm kubelet by checking its version.
+    nohup /bin/sh -c '/opt/bin/kubelet --version >/dev/null 2>&1' >/dev/null 2>&1 &
+
     createKubeManifestDir
 
     if [ "${HAS_CUSTOM_SEARCH_DOMAIN}" = "true" ]; then
@@ -194,6 +201,10 @@ function basePrep {
         logs_to_events "AKS.CSE.configureSystemdUseDomains" configureSystemdUseDomains
     fi
 
+    if [ "${SHOULD_CONFIG_CONTAINERD_ULIMITS}" = "true" ]; then
+      logs_to_events "AKS.CSE.setContainerdUlimits" configureContainerdUlimits
+    fi
+
     # containerd should not be configured until cni has been configured first
     logs_to_events "AKS.CSE.ensureContainerd" ensureContainerd
 
@@ -268,14 +279,6 @@ EOF
 
     logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD
 
-    if [ "${SHOULD_CONFIG_CONTAINERD_ULIMITS}" = "true" ]; then
-      logs_to_events "AKS.CSE.setContainerdUlimits" configureContainerdUlimits
-    fi
-
-    if [ "${ENSURE_NO_DUPE_PROMISCUOUS_BRIDGE}" = "true" ]; then
-        logs_to_events "AKS.CSE.ensureNoDupOnPromiscuBridge" ensureNoDupOnPromiscuBridge
-    fi
-
     if ! isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then
         if [ "$OS" = "$UBUNTU_OS_NAME" ] || isMarinerOrAzureLinux "$OS"; then
             logs_to_events "AKS.CSE.ubuntuSnapshotUpdate" ensureSnapshotUpdate
@@ -351,11 +354,6 @@ function nodePrep {
     # By default, never reboot new nodes.
     REBOOTREQUIRED=false
 
-    # Clean up GPU drivers if not a GPU node or if skipping driver install
-    if [ "${GPU_NODE}" != "true" ] || [ "${skip_nvidia_driver_install}" = "true" ]; then
-        logs_to_events "AKS.CSE.cleanUpGPUDrivers" cleanUpGPUDrivers
-    fi
-
     # Install and configure GPU drivers if this is a GPU node
     if [ "${GPU_NODE}" = "true" ] && [ "${skip_nvidia_driver_install}" != "true" ]; then
         echo $(date),$(hostname), "Start configuring GPU drivers"
@@ -484,10 +482,27 @@ function nodePrep {
         exit $VALIDATION_ERR
     fi
 
+    checkServiceHealth containerd || exit $ERR_SYSTEMCTL_START_FAIL
+    if [ "${ENABLE_SECURE_TLS_BOOTSTRAPPING}" = "true" ]; then
+        checkServiceHealth secure-tls-bootstrap || exit $ERR_SYSTEMCTL_START_FAIL
+    fi
+
     logs_to_events "AKS.CSE.ensureKubelet" ensureKubelet
 
+    if [ "${ENSURE_NO_DUPE_PROMISCUOUS_BRIDGE}" = "true" ]; then
+        logs_to_events "AKS.CSE.ensureNoDupOnPromiscuBridge" ensureNoDupOnPromiscuBridge
+    fi
+
     logs_to_events "AKS.CSE.configureNodeExporter" configureNodeExporter
 
+
+    # Clean up GPU drivers if not a GPU node or if skipping driver install
+    if [ "${GPU_NODE}" != "true" ] || [ "${skip_nvidia_driver_install}" = "true" ]; then
+        logs_to_events "AKS.CSE.cleanUpGPUDrivers" cleanUpGPUDrivers
+    fi
+
+    checkServiceHealth kubelet || exit $ERR_KUBELET_FAIL
+
     if $REBOOTREQUIRED; then
         echo 'reboot required, rebooting node in 1 minute'
         /bin/bash -c "shutdown -r 1 &"

@@ -21,6 +21,7 @@ ExecStartPre=-/sbin/ebtables -t nat --list
 ExecStartPre=-/sbin/iptables -t nat --numeric --list
 
 ExecStartPre=/bin/bash /opt/azure/containers/validate-kubelet-credentials.sh
+ExecStartPre=/bin/sh -c 'until [ -S /run/containerd/containerd.sock ]; do sleep 0.1; done'
 
 ExecStart=/opt/bin/kubelet \
         --enable-server \