Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a4a8fb7
Add sonic-telemetry-sidecar container
FengPan-Frank Sep 9, 2025
3c10626
Add sonic-telemetry-sidecar container
FengPan-Frank Sep 9, 2025
ba96cad
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
35ba06e
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
e644733
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
f6e5958
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
924edd1
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
6d86391
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
4155bc2
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
d7dede1
Update dockers/docker-telemetry-sidecar/systemd_stub.py
qiluo-msft Sep 21, 2025
96877b0
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 20, 2025
941c8f3
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 21, 2025
5ad59e2
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 21, 2025
d105d4f
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
866a6b8
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
8daf769
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
8435dff
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
f32b359
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
104aa15
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
4b17c9e
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
e386b82
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
580277b
Merge branch 'sidecar_container' of https://github.com/FengPan-Frank/…
FengPan-Frank Sep 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions dockers/docker-telemetry-sidecar/Dockerfile.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
ARG BASE=docker-config-engine-bookworm-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}

FROM $BASE AS base

ARG docker_container_name
ARG image_version
RUN [ -f /etc/rsyslog.conf ] && sed -ri "s/%syslogtag%/$docker_container_name#%syslogtag%/;" /etc/rsyslog.conf

# Make apt-get non-interactive
ENV DEBIAN_FRONTEND=noninteractive

# Pass the image_version to container
ENV IMAGE_VERSION=$image_version

# K8s will override this
ENV IS_V1_ENABLED=false

COPY ["systemd_stub.py", "/usr/bin/"]
COPY ["systemd_scripts/", "/usr/share/sonic/systemd_scripts/"]
COPY ["files/container_checker", "/usr/share/sonic/systemd_scripts/container_checker"]
COPY ["files/telemetry.sh", "/usr/share/sonic/systemd_scripts/telemetry_v1.sh"]
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]

RUN chmod +x /usr/bin/systemd_stub.py

FROM $BASE

RUN --mount=type=bind,from=base,target=/changes-to-image rsync -axAX --no-D --exclude=/sys --exclude=/proc --exclude=/dev --exclude=resolv.conf /changes-to-image/ /

# Make apt-get non-interactive
ENV DEBIAN_FRONTEND=noninteractive

# Pass the image_version to container
ENV IMAGE_VERSION=$image_version

ENTRYPOINT ["/usr/local/bin/supervisord"]
38 changes: 38 additions & 0 deletions dockers/docker-telemetry-sidecar/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[supervisord]
logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:dependent-startup]
command=python3 -m supervisord_dependent_startup
autostart=true
autorestart=unexpected
startretries=0
exitcodes=0,3
events=PROCESS_STATE
buffer_size=1024

[program:rsyslogd]
command=/usr/sbin/rsyslogd -n -iNONE
priority=1
autostart=false
autorestart=unexpected
stdout_logfile=NONE
stdout_syslog=true
stderr_logfile=NONE
stderr_syslog=true
dependent_startup=true

[program:systemd_stub]
command=python3 /usr/bin/systemd_stub.py
priority=3
autostart=true
autorestart=true
startsecs=0
stdout_logfile=NONE
stdout_syslog=true
stderr_logfile=NONE
stderr_syslog=true
dependent_startup=true
dependent_startup_wait_for=rsyslogd:running
environment=IS_V1_ENABLED=%(ENV_IS_V1_ENABLED)s
Copy link
Copy Markdown

@make1980 make1980 Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you have an extra 's' here?
#Closed

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also is this really needed? I don't remember seeing this for the env var used by telemetry

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without 's' Supervisor will not substitute the value, but treat it as a literal string, refer https://supervisord.org/configuration.html#environment-variables

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean IS_V1_ENABLED? it's used in systemd_stub.py

100 changes: 100 additions & 0 deletions dockers/docker-telemetry-sidecar/systemd_scripts/telemetry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/bin/bash
set -euo pipefail

SERVICE="telemetry"
NS="${NS:-sonic}" # k8s namespace
LABEL="raw_container_name=${SERVICE}" # selector used by DaemonSet
KUBECTL_BIN="${KUBECTL_BIN:-kubectl}"
NODE_NAME="${NODE_NAME:-$(hostname)}"
DEV="${2:-}" # accepted for compatibility; unused (single-ASIC)

log() { /usr/bin/logger -t "${SERVICE}#system" "$*"; }

require_kubectl() {
if ! command -v "${KUBECTL_BIN}" >/dev/null 2>&1; then
echo "ERROR: kubectl not found (KUBECTL_BIN=${KUBECTL_BIN})." >&2
exit 127
fi
# Try a sensible default if KUBECONFIG isn’t set
if [[ -z "${KUBECONFIG:-}" && -r /etc/kubernetes/kubelet.conf ]]; then
export KUBECONFIG=/etc/kubernetes/kubelet.conf
fi
}

pods_on_node() {
# Prints: "<name> <phase>" per line for this node
"${KUBECTL_BIN}" -n "${NS}" get pods \
-l "${LABEL}" \
--field-selector "spec.nodeName=${NODE_NAME}" \
-o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.phase}{"\n"}{end}' 2>/dev/null || true
}

kill_pods() {
require_kubectl
local found=0
while read -r name phase; do
[[ -z "${name}" ]] && continue
found=1
log "Deleting ${SERVICE} pod ${name} (phase=${phase}) on node ${NODE_NAME}"
# Force/instant delete to emulate “kill”; DaemonSet will recreate
"${KUBECTL_BIN}" -n "${NS}" delete pod "${name}" --grace-period=0 --force >/dev/null 2>&1 || true
done < <(pods_on_node)
if [[ "${found}" -eq 0 ]]; then
log "No ${SERVICE} pods found on node ${NODE_NAME} (namespace=${NS}, label=${LABEL})."
fi
}

cmd_start() { kill_pods; } # start == kill (DS restarts)
cmd_stop() { kill_pods; }
cmd_restart() { kill_pods; }

cmd_status() {
require_kubectl
local out; out="$(pods_on_node)"
if [[ -z "${out}" ]]; then
echo "${SERVICE}: NOT RUNNING (no pod on node ${NODE_NAME})"
exit 3
fi
echo "${out}" | while read -r name phase; do
[[ -z "${name}" ]] && continue
echo "${SERVICE} pod ${name}: ${phase}"
done
# Exit 0 if at least one Running, 1 otherwise
if echo "${out}" | awk '$2=="Running"{found=1} END{exit found?0:1}'; then
exit 0
else
exit 1
fi
}

cmd_wait() {
require_kubectl
log "Waiting on ${SERVICE} pods (ns=${NS}, label=${LABEL}) on node ${NODE_NAME}..."
# Keep the systemd service 'active' as long as at least one pod exists for this node.
while true; do
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it's probably ok to do this way since I don't think anyone is using "systemctl wait telemetry". but it seems to be simpler to just use "docker wait "?

local out; out="$(pods_on_node)"
if [[ -z "${out}" ]]; then
# no pod presently; keep waiting (DaemonSet may bring it up)
sleep 5
continue
fi
# If at least one is Running, sleep longer; otherwise poll faster
if echo "${out}" | awk '$2=="Running"{found=1} END{exit found?0:1}'; then
sleep 60
else
sleep 5
fi
done
}

case "${1:-}" in
start) cmd_start ;;
stop) cmd_stop ;;
restart) cmd_restart ;;
wait) cmd_wait ;;
status) cmd_status ;;
*)
echo "Usage: $0 {start|stop|restart|wait|status} [asic-id(optional, ignored)]" >&2
exit 2
;;
esac
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# tests/test_systemd_stub.py
import sys
import types
import importlib
from pathlib import Path

import pytest


@pytest.fixture(scope="session", autouse=True)
def fake_logger_module():
pkg = types.ModuleType("sonic_py_common")
logger_mod = types.ModuleType("sonic_py_common.logger")

class _Logger:
def __init__(self):
self.messages = []

def _log(self, level, msg):
self.messages.append((level, msg))

def log_debug(self, msg): self._log("DEBUG", msg)
def log_info(self, msg): self._log("INFO", msg)
def log_error(self, msg): self._log("ERROR", msg)
def log_notice(self, msg): self._log("NOTICE", msg)
def log_warning(self, msg): self._log("WARNING", msg)
def log_critical(self, msg): self._log("CRITICAL", msg)

logger_mod.Logger = _Logger
pkg.logger = logger_mod
sys.modules["sonic_py_common"] = pkg
sys.modules["sonic_py_common.logger"] = logger_mod
yield


@pytest.fixture
def ss(tmp_path, monkeypatch):
"""
Import systemd_stub fresh for every test, and provide fakes:
- run_nsenter: simulates a host FS + systemctl/docker calls
- container_fs: dict for "container" files
- host_fs: dict for "host" files
"""
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
ss = importlib.import_module("systemd_stub")

# Fake host filesystem and command recorder
host_fs = {}
commands = []

# Fake run_nsenter
def fake_run_nsenter(args, *, text=True, input_bytes=None):
commands.append(("nsenter", tuple(args)))
# /bin/cat <path>
if args[:1] == ["/bin/cat"] and len(args) == 2:
path = args[1]
if path in host_fs:
out = host_fs[path]
return 0, (out if not text else out.decode("utf-8", "ignore")), b"" if not text else ""
return 1, b"" if not text else "", b"No such file" if text else b"No such file"
# /bin/sh -lc "cat > /tmp/xxx"
if args[:2] == ["/bin/sh", "-lc"] and len(args) == 3 and args[2].startswith("cat > "):
tmp_path = args[2].split("cat > ", 1)[1].strip()
host_fs[tmp_path] = input_bytes or (b"" if text else b"")
return 0, "" if text else b"", "" if text else b""
# chmod / mkdir / mv / rm
if args[:1] == ["/bin/chmod"]:
return 0, "" if text else b"", "" if text else b""
if args[:1] == ["/bin/mkdir"]:
return 0, "" if text else b"", "" if text else b""
if args[:1] == ["/bin/mv"] and len(args) == 4:
src, dst = args[2], args[3]
host_fs[dst] = host_fs.get(src, b"")
host_fs.pop(src, None)
return 0, "" if text else b"", "" if text else b""
if args[:1] == ["/bin/rm"]:
target = args[-1]
host_fs.pop(target, None)
return 0, "" if text else b"", "" if text else b""
# sudo …
if args[:1] == ["sudo"]:
return 0, "" if text else b"", "" if text else b""
return 1, "" if text else b"", "unsupported" if text else b"unsupported"

monkeypatch.setattr(ss, "run_nsenter", fake_run_nsenter, raising=True)

# Fake container FS
container_fs = {}
def fake_read_file_bytes_local(path: str):
return container_fs.get(path, None)

monkeypatch.setattr(ss, "read_file_bytes_local", fake_read_file_bytes_local, raising=True)

# Isolate POST_COPY_ACTIONS
monkeypatch.setattr(ss, "POST_COPY_ACTIONS", {}, raising=True)

return ss, container_fs, host_fs, commands


def test_sha256_bytes_basic():
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
ss = importlib.import_module("systemd_stub")
assert ss.sha256_bytes(b"") == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
assert ss.sha256_bytes(None) == ""
assert ss.sha256_bytes(b"abc") == "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"


def test_host_write_atomic_and_read(ss):
ss, container_fs, host_fs, commands = ss
ok = ss.host_write_atomic("/etc/testfile", b"hello", 0o755)
assert ok
data = ss.host_read_bytes("/etc/testfile")
assert data == b"hello"
cmd_names = [c[1][0] for c in commands]
assert "/bin/sh" in cmd_names
assert "/bin/chmod" in cmd_names
assert "/bin/mkdir" in cmd_names
assert "/bin/mv" in cmd_names


def test_sync_no_change_fast_path(ss):
ss, container_fs, host_fs, commands = ss
item = ss.SyncItem("/container/telemetry.sh", "/host/telemetry.sh", 0o755)
container_fs[item.src_in_container] = b"same"
host_fs[item.dst_on_host] = b"same"
ss.SYNC_ITEMS[:] = [item]

ok = ss.ensure_sync()
assert ok is True
assert not any("/bin/sh" == c[1][0] and "-lc" in c[1] for c in commands)


def test_sync_updates_and_post_actions(ss):
ss, container_fs, host_fs, commands = ss
item = ss.SyncItem("/container/container_checker", "/bin/container_checker", 0o755)
container_fs[item.src_in_container] = b"NEW"
host_fs[item.dst_on_host] = b"OLD"
ss.SYNC_ITEMS[:] = [item]

ss.POST_COPY_ACTIONS[item.dst_on_host] = [
["sudo", "systemctl", "daemon-reload"],
["sudo", "systemctl", "restart", "monit"],
]

ok = ss.ensure_sync()
assert ok is True
assert host_fs[item.dst_on_host] == b"NEW"

post_cmds = [args for _, args in commands if args and args[0] == "sudo"]
assert ("sudo", "systemctl", "daemon-reload") in post_cmds
assert ("sudo", "systemctl", "restart", "monit") in post_cmds


def test_sync_missing_src_returns_false(ss):
ss, container_fs, host_fs, commands = ss
item = ss.SyncItem("/container/missing.sh", "/usr/local/bin/telemetry.sh", 0o755)
ss.SYNC_ITEMS[:] = [item]
ok = ss.ensure_sync()
assert ok is False


def test_main_once_exits_zero_and_disables_post_actions(monkeypatch):
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
ss = importlib.import_module("systemd_stub")

ss.POST_COPY_ACTIONS["/bin/container_checker"] = [["sudo", "echo", "hi"]]
monkeypatch.setattr(ss, "ensure_sync", lambda: True, raising=True)
monkeypatch.setattr(sys, "argv", ["systemd_stub.py", "--once", "--no-post-actions"])

rc = ss.main()
assert rc == 0
assert ss.POST_COPY_ACTIONS == {}


def test_main_once_exits_nonzero_when_sync_fails(monkeypatch):
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
ss = importlib.import_module("systemd_stub")
monkeypatch.setattr(ss, "ensure_sync", lambda: False, raising=True)
monkeypatch.setattr(sys, "argv", ["systemd_stub.py", "--once"])
rc = ss.main()
assert rc == 1


def test_env_controls_telemetry_src_true(monkeypatch):
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
monkeypatch.setenv("IS_V1_ENABLED", "true")

ss = importlib.import_module("systemd_stub")
assert ss.IS_V1_ENABLED is True
assert ss._TELEMETRY_SRC.endswith("telemetry_v1.sh")


def test_env_controls_telemetry_src_false(monkeypatch):
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
monkeypatch.setenv("IS_V1_ENABLED", "false")

ss = importlib.import_module("systemd_stub")
assert ss.IS_V1_ENABLED is False
assert ss._TELEMETRY_SRC.endswith("telemetry.sh")


def test_env_controls_telemetry_src_default(monkeypatch):
if "systemd_stub" in sys.modules:
del sys.modules["systemd_stub"]
monkeypatch.delenv("IS_V1_ENABLED", raising=False)

ss = importlib.import_module("systemd_stub")
assert ss.IS_V1_ENABLED is False
assert ss._TELEMETRY_SRC.endswith("telemetry.sh")
Loading
Loading