Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 8 additions & 21 deletions e2e/validators.go
Original file line number Diff line number Diff line change
Expand Up @@ -1509,30 +1509,17 @@ func ValidateNodeExporter(ctx context.Context, s *Scenario) {
ValidateFileExists(ctx, s, skipFile)
ValidateFileExists(ctx, s, "/etc/node-exporter.d/web-config.yml")

// Validate that node-exporter is listening on port 19100
// We verify the port is open using ss/netstat rather than making a full mTLS request,
// since the e2e test environment may not have the correct client certs set up.
// The mTLS configuration is validated by checking that the web-config.yml exists
// and contains the expected TLS settings.
s.T.Logf("Validating node-exporter is listening on port 19100")
// Validate that node-exporter is listening on port 19100 and serving metrics.
// TLS is disabled by default (opt-in via NODE_EXPORTER_TLS_ENABLED=true in /etc/default/node-exporter),
// so we validate by making a plain HTTP request to the metrics endpoint.
s.T.Logf("Validating node-exporter is listening on port 19100 and serving metrics")
command := []string{
"set -ex",
"NODE_IP=$(hostname -I | awk '{print $1}')",
// Verify node-exporter is listening on port 19100
"ss -tlnp | grep -q ':19100' || netstat -tlnp | grep -q ':19100'",
// Extract the listen address from ss, replacing wildcard '*' or '0.0.0.0' with localhost.
"LISTEN_ADDR=$(ss -tlnp | grep ':19100' | awk '{print $4}' | head -1 | sed 's/^\\*/127.0.0.1/; s/^0\\.0\\.0\\.0/127.0.0.1/')",
"curl -sf http://${LISTEN_ADDR}/metrics | grep -q 'node_'",
}
Comment thread
chmill-zz marked this conversation as resolved.
execScriptOnVMForScenarioValidateExitCode(ctx, s, strings.Join(command, "\n"), 0, "node-exporter should be listening on port 19100")

// Verify the web-config.yml has proper TLS configuration
s.T.Logf("Validating node-exporter TLS configuration")
tlsCommand := []string{
"set -ex",
// Verify web-config.yml contains TLS settings
"grep -q 'tls_server_config' /etc/node-exporter.d/web-config.yml",
"grep -q 'client_auth_type' /etc/node-exporter.d/web-config.yml",
"grep -q 'client_ca_file' /etc/node-exporter.d/web-config.yml",
}
execScriptOnVMForScenarioValidateExitCode(ctx, s, strings.Join(tlsCommand, "\n"), 0, "node-exporter TLS config should be properly configured")
execScriptOnVMForScenarioValidateExitCode(ctx, s, strings.Join(command, "\n"), 0, "node-exporter should be listening on port 19100 and serving metrics over HTTP")

s.T.Logf("node-exporter validation passed")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
tls_server_config:
cert_file: "/etc/kubernetes/certs/kubeletserver.crt"
key_file: "/etc/kubernetes/certs/kubeletserver.key"
client_auth_type: "RequireAndVerifyClientCert"
client_ca_file: "/etc/kubernetes/certs/ca.crt"
# TLS is disabled by default. To enable, set NODE_EXPORTER_TLS_ENABLED=true
# in /etc/default/node-exporter. This file will be overwritten by the startup
# script when TLS is enabled and valid serving certificates are available.
Original file line number Diff line number Diff line change
Expand Up @@ -9,85 +9,82 @@ fi
TLS_CONFIG_PATH="/etc/node-exporter.d/web-config.yml"
TLS_CONFIG_ARG=""

# Ensure TLS config directory exists
mkdir -p "$(dirname "$TLS_CONFIG_PATH")"

# Check IMDS tag to determine cert rotation setting (same logic as CSE)
# If aks-disable-kubelet-serving-certificate-rotation=true, use static certs
# Otherwise, use rotation cert
ROTATION_DISABLED="false"
IMDS_CACHE_FILE="/opt/azure/containers/imds_instance_metadata_cache.json"

# Use CSE's cached IMDS response if available, otherwise fetch directly
if [ -f "$IMDS_CACHE_FILE" ]; then
IMDS_RESPONSE=$(cat "$IMDS_CACHE_FILE")
else
IMDS_RESPONSE=$(curl -fsSL -H "Metadata: true" --noproxy "*" --retry 20 --retry-delay 2 --retry-connrefused --connect-timeout 5 --max-time 60 "http://169.254.169.254/metadata/instance?api-version=2021-02-01" 2>/dev/null)
fi

if [ -z "$IMDS_RESPONSE" ]; then
echo "WARNING: Failed to fetch IMDS metadata, assuming cert rotation is enabled"
fi

if [ -n "$IMDS_RESPONSE" ]; then
ROTATION_DISABLED=$(echo "$IMDS_RESPONSE" | jq -r '.compute.tagsList | map(select(.name | test("aks-disable-kubelet-serving-certificate-rotation"; "i")))[0].value // "false" | test("true"; "i")' 2>/dev/null || echo "false")
fi

# Wait for the appropriate cert to exist (max 5 minutes)
# Certs are created by kubelet during its bootstrap process when it connects
# to the API server, so they may not exist immediately at boot time
WAIT_TIMEOUT=300
WAIT_INTERVAL=5
WAIT_ELAPSED=0

while [ $WAIT_ELAPSED -lt $WAIT_TIMEOUT ]; do
if [ "$ROTATION_DISABLED" = "true" ]; then
# Rotation disabled - wait for static certs
if [ -f "/etc/kubernetes/certs/kubeletserver.crt" ] && [ -f "/etc/kubernetes/certs/kubeletserver.key" ]; then
# TLS is disabled by default for backward compatibility:
# - AKS control plane Prometheus scrapes node-exporter via the API server proxy,
# which connects to backends over plain HTTP. Enabling TLS breaks this path.
# - The old node-exporter VM extension also defaulted to no TLS.
#
# To enable TLS, set NODE_EXPORTER_TLS_ENABLED=true in /etc/default/node-exporter.
# Optionally set NODE_EXPORTER_TLS_CLIENT_AUTH to control client cert requirements
# (default: NoClientCert). Valid values: NoClientCert, RequestClientCert,
# RequireAnyClientCert, VerifyClientCertIfGiven, RequireAndVerifyClientCert.
if [ "${NODE_EXPORTER_TLS_ENABLED:-false}" = "true" ]; then
mkdir -p "$(dirname "$TLS_CONFIG_PATH")"

TLS_CLIENT_AUTH="${NODE_EXPORTER_TLS_CLIENT_AUTH:-NoClientCert}"

Comment thread
chmill-zz marked this conversation as resolved.
# Validate client auth type against supported values
case "$TLS_CLIENT_AUTH" in
NoClientCert|RequestClientCert|RequireAnyClientCert|VerifyClientCertIfGiven|RequireAndVerifyClientCert) ;;
*)
echo "WARNING: unsupported NODE_EXPORTER_TLS_CLIENT_AUTH='$TLS_CLIENT_AUTH', defaulting to NoClientCert"
TLS_CLIENT_AUTH="NoClientCert"
;;
esac

# Wait for kubelet serving certs to exist (max 5 minutes).
# Certs are created by kubelet during bootstrap and may not exist at boot time.
WAIT_TIMEOUT=300
WAIT_INTERVAL=5
WAIT_ELAPSED=0

while [ $WAIT_ELAPSED -lt $WAIT_TIMEOUT ]; do
if [ -f "/var/lib/kubelet/pki/kubelet-server-current.pem" ] || \
{ [ -f "/etc/kubernetes/certs/kubeletserver.crt" ] && [ -f "/etc/kubernetes/certs/kubeletserver.key" ]; }; then
break
fi
echo "Waiting for kubelet serving certs... (${WAIT_ELAPSED}s/${WAIT_TIMEOUT}s)"
sleep $WAIT_INTERVAL
WAIT_ELAPSED=$((WAIT_ELAPSED + WAIT_INTERVAL))
done

# Detect TLS cert paths
# Priority: rotation cert > static certs
CERT_FILE=""
KEY_FILE=""

if [ -f "/var/lib/kubelet/pki/kubelet-server-current.pem" ]; then
Comment thread
chmill-zz marked this conversation as resolved.
CERT_FILE="/var/lib/kubelet/pki/kubelet-server-current.pem"
KEY_FILE="/var/lib/kubelet/pki/kubelet-server-current.pem"
echo "Using kubelet serving certificate rotation cert: $CERT_FILE"
elif [ -f "/etc/kubernetes/certs/kubeletserver.crt" ] && [ -f "/etc/kubernetes/certs/kubeletserver.key" ]; then
CERT_FILE="/etc/kubernetes/certs/kubeletserver.crt"
KEY_FILE="/etc/kubernetes/certs/kubeletserver.key"
echo "Using static kubelet serving certs: $CERT_FILE, $KEY_FILE"
else
# Rotation enabled - wait for rotation cert
if [ -f "/var/lib/kubelet/pki/kubelet-server-current.pem" ]; then
break
fi
echo "WARNING: TLS enabled but no kubelet serving certs found after ${WAIT_TIMEOUT}s. node-exporter will run without TLS."
Comment thread
chmill-zz marked this conversation as resolved.
fi
Comment thread
chmill-zz marked this conversation as resolved.
# Also check the other cert type in case IMDS tag detection was wrong
if [ -f "/var/lib/kubelet/pki/kubelet-server-current.pem" ] || \
{ [ -f "/etc/kubernetes/certs/kubeletserver.crt" ] && [ -f "/etc/kubernetes/certs/kubeletserver.key" ]; }; then
break
fi
sleep $WAIT_INTERVAL
WAIT_ELAPSED=$((WAIT_ELAPSED + WAIT_INTERVAL))
done

# Detect TLS cert paths
# Priority: rotation cert > static certs > skip TLS
CERT_FILE=""
KEY_FILE=""

if [ -f "/var/lib/kubelet/pki/kubelet-server-current.pem" ]; then
CERT_FILE="/var/lib/kubelet/pki/kubelet-server-current.pem"
KEY_FILE="/var/lib/kubelet/pki/kubelet-server-current.pem"
echo "Using kubelet serving certificate rotation cert: $CERT_FILE"
elif [ -f "/etc/kubernetes/certs/kubeletserver.crt" ] && [ -f "/etc/kubernetes/certs/kubeletserver.key" ]; then
CERT_FILE="/etc/kubernetes/certs/kubeletserver.crt"
KEY_FILE="/etc/kubernetes/certs/kubeletserver.key"
echo "Using static kubelet serving certs: $CERT_FILE, $KEY_FILE"
else
echo "WARNING: No kubelet serving certs found after ${WAIT_TIMEOUT}s, node-exporter will run without TLS. Restart the service after certs are available to enable TLS."
fi

# Configure TLS if we found valid cert paths
if [ -n "$CERT_FILE" ] && [ -n "$KEY_FILE" ]; then
cat > "$TLS_CONFIG_PATH" <<EOF
if [ -n "$CERT_FILE" ] && [ -n "$KEY_FILE" ]; then
if [ "$TLS_CLIENT_AUTH" != "NoClientCert" ]; then
cat > "$TLS_CONFIG_PATH" <<EOF
tls_server_config:
cert_file: "$CERT_FILE"
key_file: "$KEY_FILE"
client_auth_type: "RequireAndVerifyClientCert"
client_auth_type: "$TLS_CLIENT_AUTH"
client_ca_file: "/etc/kubernetes/certs/ca.crt"
EOF
TLS_CONFIG_ARG="--web.config.file=${TLS_CONFIG_PATH}"
else
cat > "$TLS_CONFIG_PATH" <<EOF
tls_server_config:
cert_file: "$CERT_FILE"
key_file: "$KEY_FILE"
client_auth_type: "NoClientCert"
EOF
fi
echo "TLS configured: client_auth_type=$TLS_CLIENT_AUTH, cert=$CERT_FILE"
TLS_CONFIG_ARG="--web.config.file=${TLS_CONFIG_PATH}"
Comment thread
chmill-zz marked this conversation as resolved.
Comment thread
chmill-zz marked this conversation as resolved.
fi
fi

ARGS=(
Expand Down
Loading