Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions dockers/docker-telemetry-watchdog/Dockerfile.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
ARG BASE=docker-config-engine-bookworm-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}

FROM $BASE AS builder

# Update apt's cache of available packages
RUN apt-get update && apt-get install -y \
build-essential

# Install Rust/Cargo via rustup
ARG RUST_ROOT=/usr/.cargo
RUN RUSTUP_HOME=$RUST_ROOT CARGO_HOME=$RUST_ROOT bash -c \
'curl --proto "=https" -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.79.0 -y'
ENV RUSTUP_HOME=$RUST_ROOT
ENV PATH=$PATH:$RUST_ROOT/bin

# Copy watchdog source into /watchdog
WORKDIR /watchdog
COPY watchdog/ ./

# Build from within /watchdog
RUN cargo build --release

FROM $BASE AS base

ARG docker_container_name
ARG image_version
RUN [ -f /etc/rsyslog.conf ] && sed -ri "s/%syslogtag%/$docker_container_name#%syslogtag%/;" /etc/rsyslog.conf

ENV DEBIAN_FRONTEND=noninteractive
ENV IMAGE_VERSION=$image_version

# Copy supervisord.conf into final stage
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]

# Copy the compiled Rust binary from the builder stage
COPY --from=builder /watchdog/target/release/telemetry_watchdog /usr/bin/telemetry_watchdog
RUN chmod +x /usr/bin/telemetry_watchdog

ENTRYPOINT ["/usr/local/bin/supervisord"]
37 changes: 37 additions & 0 deletions dockers/docker-telemetry-watchdog/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[supervisord]
logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:dependent-startup]
command=python3 -m supervisord_dependent_startup
autostart=true
autorestart=unexpected
startretries=0
exitcodes=0,3
events=PROCESS_STATE
buffer_size=1024

[program:rsyslogd]
command=/usr/sbin/rsyslogd -n -iNONE
priority=1
autostart=false
autorestart=unexpected
stdout_logfile=NONE
stdout_syslog=true
stderr_logfile=NONE
stderr_syslog=true
dependent_startup=true

[program:telemetry_watchdog]
command=/usr/bin/telemetry_watchdog
priority=3
autostart=false
autorestart=false
startsecs=0
stdout_logfile=NONE
stdout_syslog=true
stderr_logfile=NONE
stderr_syslog=true
dependent_startup=true
dependent_startup_wait_for=rsyslogd:running
7 changes: 7 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
name = "telemetry_watchdog"
version = "0.1.0"
edition = "2021"
description = "watchdog for telemetry container"
license = "MIT"
authors = ["Feng Pan"]

[dependencies]
daemonize = "0.5"
chrono = "0.4"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
redis = "0.23.3"
url = "=2.4.1"

[[bin]]
name = "telemetry_watchdog"
path = "src/main.rs"
27 changes: 27 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.ONESHELL:
SHELL = /bin/bash
.SHELLFLAGS += -e

#
# Debug build targets
#
build:
cargo build --all

test:
cargo test --all

clean:
cargo clean

#
# Release build targets
#
build-release:
cargo build --release --all

test-release:
cargo test --release --all

clean-release:
cargo clean --release
5 changes: 5 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/debian/changelog
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sonic (1.0.0) stable; urgency=medium

* Initial release

-- Feng Pan <fenpan@microsoft.com> Tue, 15 Apr 2025 03:13:12 +0000
1 change: 1 addition & 0 deletions dockers/docker-telemetry-watchdog/watchdog/debian/compat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
11
9 changes: 9 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/debian/control
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Source: sonic
Maintainer: Feng Pan <fenpan@microsoft.com>
Section: net
Priority: optional
Standards-Version: 1.0.0

Package: sonic-telemetry-watchdog
Architecture: any
Description: telemetry watchdog for KubeSONiC project
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
target/release/telemetry_watchdog /usr/bin
17 changes: 17 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/debian/rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/make -f
# See debhelper(7) (uncomment to enable)
# output every command that modifies files on the build system.
#export DH_VERBOSE = 1

%:
dh $@

override_dh_auto_build:
cargo build --release --all

override_dh_auto_clean:
cargo clean --release

override_dh_auto_test:
# do nothing
:
143 changes: 143 additions & 0 deletions dockers/docker-telemetry-watchdog/watchdog/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use std::io::{BufRead, BufReader, Write};
use std::net::{TcpListener, TcpStream};

use serde::Serialize;
use redis::Commands;

// Fail-open: if Redis is down or field is missing/invalid, default to 50051
const DEFAULT_TELEMETRY_SERVICE_PORT: u16 = 50051;

#[derive(Serialize)]
struct HealthStatus {
check_telemetry_port: String,
}

fn get_gnmi_port() -> u16 {
let client = match redis::Client::open("redis://127.0.0.1:6379/4") {
Ok(c) => c,
Err(e) => {
eprintln!("Redis client error (port): {e}");
return DEFAULT_TELEMETRY_SERVICE_PORT;
}
};
let mut conn = match client.get_connection() {
Ok(c) => c,
Err(e) => {
eprintln!("Redis connection error (port): {e}");
return DEFAULT_TELEMETRY_SERVICE_PORT;
}
};

let res: redis::RedisResult<Option<String>> = conn.hget("TELEMETRY|gnmi", "port");
match res {
Ok(Some(p)) => p.parse::<u16>().unwrap_or_else(|_| {
eprintln!("Redis: TELEMETRY|gnmi.port not a valid u16: {p}");
DEFAULT_TELEMETRY_SERVICE_PORT
}),
Ok(None) => {
eprintln!("Redis: TELEMETRY|gnmi.port missing; defaulting to {}", DEFAULT_TELEMETRY_SERVICE_PORT);
DEFAULT_TELEMETRY_SERVICE_PORT
}
Err(e) => {
eprintln!("Redis HGET error (port): {e}");
DEFAULT_TELEMETRY_SERVICE_PORT
}
}
}

// Connects to Redis DB 4 and returns true if the telemetry feature is enabled.
// If Redis is unavailable or the field is missing, default to enabled (fail-open).
fn is_telemetry_enabled() -> bool {
let client = match redis::Client::open("redis://127.0.0.1:6379/4") {
Ok(c) => c,
Err(e) => {
eprintln!("Redis client error (feature): {e}");
return true;
}
};
let mut conn = match client.get_connection() {
Ok(c) => c,
Err(e) => {
eprintln!("Redis connection error (feature): {e}");
return true;
}
};

let res: redis::RedisResult<Option<String>> = conn.hget("FEATURE|telemetry", "state");
match res {
Ok(Some(state)) => !state.eq_ignore_ascii_case("disabled"),
Ok(None) => {
eprintln!("Redis: FEATURE|telemetry.state missing; defaulting to enabled");
true
}
Err(e) => {
eprintln!("Redis HGET error (feature): {e}");
true
}
}
}

fn check_telemetry_port() -> String {
let port = get_gnmi_port();
let addr = format!("127.0.0.1:{port}");
match TcpStream::connect(&addr) {
Ok(_) => "OK".to_string(),
Err(e) => format!("ERROR: {}", e),
}
}

fn main() {
let listener = TcpListener::bind("127.0.0.1:50080")
.expect("Failed to bind to 127.0.0.1:50080");
println!("Watchdog HTTP server running on http://127.0.0.1:50080");

for stream_result in listener.incoming() {
match stream_result {
Ok(mut stream) => {
let mut reader = BufReader::new(&stream);
let mut request_line = String::new();

if let Ok(_) = reader.read_line(&mut request_line) {
println!("Received request: {}", request_line.trim_end());

if !request_line.starts_with("GET /") {
let response = "HTTP/1.1 405 Method Not Allowed\r\n\r\n";
let _ = stream.write_all(response.as_bytes());
continue;
}

let telemetry_enabled = is_telemetry_enabled();

let (result_string, http_status) = if !telemetry_enabled {
("SKIPPED: feature disabled".to_string(), "HTTP/1.1 200 OK")
} else {
let port_result = check_telemetry_port();
let ok = port_result.starts_with("OK");
let status_line = if ok {
"HTTP/1.1 200 OK"
} else {
"HTTP/1.1 500 Internal Server Error"
};
(port_result, status_line)
};

let status = HealthStatus {
check_telemetry_port: result_string,
};

let json_body = serde_json::to_string(&status).unwrap();
let response = format!(
"{http_status}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
json_body.len(),
json_body
);

if let Err(e) = stream.write_all(response.as_bytes()) {
eprintln!("Failed to write response: {}", e);
}
}
}
Err(e) => eprintln!("Error accepting connection: {}", e),
}
}
}
10 changes: 10 additions & 0 deletions rules/docker-telemetry-watchdog.dep
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
DPATH := $($(DOCKER_TELEMETRY_WATCHDOG)_PATH)
DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/docker-telemetry-watchdog.mk rules/docker-telemetry-watchdog.dep
DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
DEP_FILES += $(shell git ls-files $(DPATH))

$(DOCKER_TELEMETRY_WATCHDOG)_CACHE_MODE := GIT_CONTENT_SHA
$(DOCKER_TELEMETRY_WATCHDOG)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST)
$(DOCKER_TELEMETRY_WATCHDOG)_DEP_FILES := $(DEP_FILES)

$(eval $(call add_dbg_docker,$(DOCKER_TELEMETRY_WATCHDOG),$(DOCKER_TELEMETRY_WATCHDOG_DBG)))
27 changes: 27 additions & 0 deletions rules/docker-telemetry-watchdog.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# docker image for telemetry watchdog

DOCKER_TELEMETRY_WATCHDOG_STEM = docker-telemetry-watchdog
DOCKER_TELEMETRY_WATCHDOG = $(DOCKER_TELEMETRY_WATCHDOG_STEM).gz
DOCKER_TELEMETRY_WATCHDOG_DBG = $(DOCKER_TELEMETRY_WATCHDOG_STEM)-$(DBG_IMAGE_MARK).gz

$(DOCKER_TELEMETRY_WATCHDOG)_LOAD_DOCKERS = $(DOCKER_CONFIG_ENGINE_BOOKWORM)

$(DOCKER_TELEMETRY_WATCHDOG)_PATH = $(DOCKERS_PATH)/$(DOCKER_TELEMETRY_WATCHDOG_STEM)

$(DOCKER_TELEMETRY_WATCHDOG)_VERSION = 1.0.0
$(DOCKER_TELEMETRY_WATCHDOG)_PACKAGE_NAME = telemetry_watchdog

SONIC_DOCKER_IMAGES += $(DOCKER_TELEMETRY_WATCHDOG)
SONIC_BOOKWORM_DOCKERS += $(DOCKER_TELEMETRY_WATCHDOG)
SONIC_INSTALL_DOCKER_IMAGES += $(DOCKER_TELEMETRY_WATCHDOG)

SONIC_DOCKER_DBG_IMAGES += $(DOCKER_TELEMETRY_WATCHDOG_DBG)
SONIC_BOOKWORM_DBG_DOCKERS += $(DOCKER_TELEMETRY_WATCHDOG_DBG)
SONIC_INSTALL_DOCKER_DBG_IMAGES += $(DOCKER_TELEMETRY_WATCHDOG_DBG)

$(DOCKER_TELEMETRY_WATCHDOG)_CONTAINER_NAME = telemetry_watchdog
$(DOCKER_TELEMETRY_WATCHDOG)_RUN_OPT += -t --privileged --pid=host
$(DOCKER_TELEMETRY_WATCHDOG)_RUN_OPT += -v /lib/systemd/system:/lib/systemd/system:rw
$(DOCKER_TELEMETRY_WATCHDOG)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_TELEMETRY_WATCHDOG)_RUN_OPT += -v /etc/localtime:/etc/localtime:ro