diff --git a/dockers/docker-sonic-telemetry/Dockerfile.j2 b/dockers/docker-sonic-telemetry/Dockerfile.j2 index 88ff9431820..8cd79203a70 100644 --- a/dockers/docker-sonic-telemetry/Dockerfile.j2 +++ b/dockers/docker-sonic-telemetry/Dockerfile.j2 @@ -26,7 +26,11 @@ RUN apt-get clean -y && \ apt-get autoremove -y && \ rm -rf /debs +# Install the python `inotify` package +RUN pip3 install inotify + COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"] +COPY ["certificate_rotation_checker", "/usr/bin/"] COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] diff --git a/dockers/docker-sonic-telemetry/certificate_rotation_checker b/dockers/docker-sonic-telemetry/certificate_rotation_checker new file mode 100755 index 00000000000..4ece188d867 --- /dev/null +++ b/dockers/docker-sonic-telemetry/certificate_rotation_checker @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 + +""" +certificate_rotation_checker + +This script will be leveraged to periodically check whether the certificate file +of streaming telemetry was rotated or not. The configuration of streaming telemetry +server process will be reloaded if the certificate file was rotated. +""" + +import os +import signal +import subprocess +import sys +import syslog +import time + +import inotify.adapters + +from swsscommon import swsscommon + +MAX_RETRY_TIMES = 10 +CERTIFICATE_CHECKING_INTERVAL_SECS = 3600 + +CREDENTIALS_DIR_PATH = "/etc/sonic/credentials/" + + +def get_command_result(command): + """Executes the command and returns the exiting code and resulting output. + + Args: + command: A string contains the command to be executed. + + Returns: + An integer indicates the exiting code. + A string which contains the output of command. + """ + command_stdout = "" + command_stderr = "" + + try: + proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + shell=True, universal_newlines=True) + command_stdout, command_stderr = proc_instance.communicate() + except (OSError, ValueError) as err: + syslog.syslog(syslog.LOG_ERR, "Failed to execute the command '{}'. Error: '{}'" + .format(command, err)) + return 2, command_stderr + + return proc_instance.returncode, command_stdout.strip() + + +def get_telemetry_server_info(): + """Gets telemetry server process information. + + Args: + None. + + Returns: + If telemetry server process is running, returns True and process id; + Otherwise returns False and -1. + """ + processes_status_cmd = "supervisorctl status" + retry_times = 0 + + while retry_times <= MAX_RETRY_TIMES: + retry_times += 1 + exit_code, command_stdout = get_command_result(processes_status_cmd) + if exit_code != 3: + syslog.syslog(syslog.LOG_INFO, + "Failed to get the telemetry server process information and retry after 60 seconds ...") + time.sleep(60) + else: + for line in command_stdout.splitlines(): + if "telemetry" in line and "RUNNING" in line: + return True, line.split()[3].strip(",") + + return False, -1 + + +def reload_telemetry_server_configuration(): + """Reloads the telemetry server configuration by sending signal 'SIGHUP' + to telemetry server process and checks it is actually running after doing the reload. + + Args: + None + + Returns: + Returns True if the configuration was reloaded successfully; Otherwise, return False. + """ + telemetry_server_pid = -1 + is_running = False + + is_running, telemetry_server_pid = get_telemetry_server_info() + if not is_running: + syslog.syslog(syslog.LOG_ERR, + "Telemetry server process is not running before reloading configuration!") + return False + + syslog.syslog(syslog.LOG_INFO, + "Telemetry server process is running with PID: {}".format(telemetry_server_pid)) + syslog.syslog(syslog.LOG_INFO, "Sending 'SIGHUP' signal to telemetry server process ...") + + os.kill(int(telemetry_server_pid), signal.SIGHUP) + + syslog.syslog(syslog.LOG_INFO, "'SIGHUP' signal was sent out.") + + # Wait for 120 seconds to check whether telemetry server process comes back + time.sleep(120) + + is_running, telemetry_server_pid = get_telemetry_server_info() + if not is_running: + syslog.syslog(syslog.LOG_ERR, + "Telemetry server process is not running after reloading configuration!") + return False + + syslog.syslog(syslog.LOG_INFO, "Telemetry server process is running after reloading configuration!") + return True + + +def check_certificate_rotated(certificate_file_name): + """Leverages the 'inotify' module to monitor the file system events under the + directory which stores the SONiC credentials and reloads telemetry server + configuration if its certificate was rotated. + + + Args: + certificate_file_name: A string indicates the telemetry certificate file name. + + Returns: + None. + """ + certificate_file_rotated = False + + inotify_instance = inotify.adapters.Inotify() + inotify_instance.add_watch(CREDENTIALS_DIR_PATH) + for event in inotify_instance.event_gen(yield_nones=False): + header, event_type, monitoring_path, file_name = event + if (file_name == certificate_file_name + and ("IN_CREATE" in event_type or "IN_MOVED_TO" in event_type)): + certificate_file_rotated = True + + if certificate_file_rotated: + certificate_file_rotated = False + syslog.syslog(syslog.LOG_INFO, + "Certificate was rotated and reloading telemetry server configuration ...") + + if not reload_telemetry_server_configuration(): + syslog.syslog(syslog.LOG_ERR, + "Failed to reload the telemetry server configuration!") + + syslog.syslog(syslog.LOG_INFO, "Telemetry server configuration was reloaded successfully!") + + +def certificate_rotated_checker(): + """Checks rotation of certificate file and then reloads streaming telemetry server configuration. + + Leverages 'inotify' module to check whether the certificate file of streaming telemetry was + rotated or not. The configuration of telemetry server process will be reloaded if it was rotated. + + Args: + None + + Returns: + None + """ + certificate_file_path = "" + private_key_file_path = "" + certificate_file_name = "" + + config_db = swsscommon.DBConnector("CONFIG_DB", 0) + telemetry_table = swsscommon.Table(config_db, "TELEMETRY") + telemetry_table_keys = telemetry_table.getKeys() + if "certs" in telemetry_table_keys: + certs_info = dict(telemetry_table.get("certs")[1]) + if "server_crt" in certs_info and "server_key" in certs_info: + certificate_file_path = certs_info["server_crt"] + private_key_file_path = certs_info["server_key"] + syslog.syslog(syslog.LOG_INFO, "Path of certificate file is '{}'".format(certificate_file_path)) + syslog.syslog(syslog.LOG_INFO, "Path of private key file is '{}'".format(private_key_file_path)) + else: + syslog.syslog(syslog.LOG_ERR, + "Failed to retrieve the path of certificate or private key file from 'TELEMETRY' table!") + sys.exit(1) + else: + syslog.syslog(syslog.LOG_ERR, + "Failed to retrieve the certificate information from 'TELEMETRY' table!") + sys.exit(2) + + while True: + if not os.path.exists(certificate_file_path) or not os.path.exists(private_key_file_path): + syslog.syslog(syslog.LOG_ERR, + "Certificate or private key file did not exist on device and checks again after '{}' seconds ..." + .format(CERTIFICATE_CHECKING_INTERVAL_SECS)) + time.sleep(CERTIFICATE_CHECKING_INTERVAL_SECS) + else: + break + + certificate_file_name = certificate_file_path.strip().split("/")[-1] + if not certificate_file_name: + syslog.syslog(syslog.LOG_ERR, + "Failed to retrieve the file name of certificate!") + sys.exit(3) + + check_certificate_rotated(certificate_file_name) + + +def main(): + certificate_rotated_checker() + + +if __name__ == "__main__": + main() + sys.exit(0) diff --git a/dockers/docker-sonic-telemetry/critical_processes b/dockers/docker-sonic-telemetry/critical_processes index 612a94d9eda..810415d53fc 100644 --- a/dockers/docker-sonic-telemetry/critical_processes +++ b/dockers/docker-sonic-telemetry/critical_processes @@ -1,2 +1,3 @@ program:telemetry program:dialout +program:certificate_rollover_checker diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index 5ab0e3ca59d..aeffc201280 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -58,3 +58,13 @@ stdout_logfile=syslog stderr_logfile=syslog dependent_startup=true dependent_startup_wait_for=telemetry:running + +[program:certificate_rollover_checker] +command=/usr/bin/certificate_rollover_checker +priority=5 +autostart=false +autorestart=false +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=start:exited diff --git a/dockers/docker-sonic-telemetry/telemetry.sh b/dockers/docker-sonic-telemetry/telemetry.sh index 1f92657e3b8..4fb2a2773fd 100755 --- a/dockers/docker-sonic-telemetry/telemetry.sh +++ b/dockers/docker-sonic-telemetry/telemetry.sh @@ -19,54 +19,70 @@ CERTS=$(echo $TELEMETRY_VARS | jq -r '.certs') TELEMETRY_ARGS=" -logtostderr" export CVL_SCHEMA_PATH=/usr/sbin/schema -if [ -n "$CERTS" ]; then - SERVER_CRT=$(echo $CERTS | jq -r '.server_crt') - SERVER_KEY=$(echo $CERTS | jq -r '.server_key') - if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then - TELEMETRY_ARGS+=" --insecure" +while true +do + if [ -n "$CERTS" ]; then + SERVER_CRT=$(echo $CERTS | jq -r '.server_crt') + SERVER_KEY=$(echo $CERTS | jq -r '.server_key') + CA_CRT=$(echo $CERTS | jq -r '.ca_crt') + + logger "Trying to retrieve server certificate, key and Root CA certificate ..." + logger "The file path of server certificate in CONFIG_DB is: $SERVER_CRT" + logger "The file path of server provate key in CONFIG_DB is: $SERVER_KEY" + logger "The file path of Root CA certificate in CONFIG_DB is: $CA_CRT" + + if [[ -f $SERVER_CRT && -f $SERVER_KEY && -f $CA_CRT ]]; then + logger "Succeeded in retrieving server certificate, key and Root CA certificate." + TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY --ca_crt $CA_CRT" + break + else + logger "Failed to retrieve server certificate, key or Root CA certificate!" + fi + elif [ -n "$X509" ]; then + SERVER_CRT=$(echo $X509 | jq -r '.server_crt') + SERVER_KEY=$(echo $X509 | jq -r '.server_key') + CA_CRT=$(echo $X509 | jq -r '.ca_crt') + + logger "Trying to retrieve server certificate, key and Root CA certificate ..." + logger "The file path of server certificate in CONFIG_DB is: $SERVER_CRT" + logger "The file path of server provate key in CONFIG_DB is: $SERVER_KEY" + logger "The file path of Root CA certificate in CONFIG_DB is: $CA_CRT" + + if [[ -f $SERVER_CRT && -f $SERVER_KEY && -f $CA_CRT ]]; then + logger "Succeeded in retrieving server certificate, key and Root CA certificate." + TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY --ca_crt $CA_CRT" + break + else + logger "Failed to retrieve server certificate, key or Root CA certificate!" + fi else - TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY " + TELEMETRY_ARGS+=" --noTLS" + break fi - CA_CRT=$(echo $CERTS | jq -r '.ca_crt') - if [ ! -z $CA_CRT ]; then - TELEMETRY_ARGS+=" --ca_crt $CA_CRT" - fi -elif [ -n "$X509" ]; then - SERVER_CRT=$(echo $X509 | jq -r '.server_crt') - SERVER_KEY=$(echo $X509 | jq -r '.server_key') - if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then - TELEMETRY_ARGS+=" --insecure" + logger "Sleeping 3600 seconds and checks the existence of secrets again ..." + sleep 3600 +done + +if [ -n "$GNMI" ]; then + PORT=$(echo $GNMI | jq -r '.port') + if [ ! -z $PORT ] || [ $PORT != "null" ]; then + TELEMETRY_ARGS+=" --port $PORT" else - TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY " + TELEMETRY_ARGS+=" --port 8080" fi - CA_CRT=$(echo $X509 | jq -r '.ca_crt') - if [ ! -z $CA_CRT ]; then - TELEMETRY_ARGS+=" --ca_crt $CA_CRT" + LOG_LEVEL=$(echo $GNMI | jq -r '.log_level') + if [ ! -z $LOG_LEVEL ] || [ $LOG_LEVEL != "null" ]; then + TELEMETRY_ARGS+=" -v=$LOG_LEVEL" + else + TELEMETRY_ARGS+=" -v=2" fi -else - TELEMETRY_ARGS+=" --noTLS" -fi - -# If no configuration entry exists for TELEMETRY, create one default port -if [ -z "$GNMI" ]; then - PORT=8080 -else - PORT=$(echo $GNMI | jq -r '.port') -fi -TELEMETRY_ARGS+=" --port $PORT" -CLIENT_AUTH=$(echo $GNMI | jq -r '.client_auth') -if [ -z $CLIENT_AUTH ] || [ $CLIENT_AUTH == "false" ]; then - TELEMETRY_ARGS+=" --allow_no_client_auth" -fi - -LOG_LEVEL=$(echo $GNMI | jq -r '.log_level') -if [ ! -z $LOG_LEVEL ]; then - TELEMETRY_ARGS+=" -v=$LOG_LEVEL" -else - TELEMETRY_ARGS+=" -v=2" + CLIENT_AUTH=$(echo $GNMI | jq -r '.client_auth') + if [ -z $CLIENT_AUTH ] || [ $CLIENT_AUTH == "null" ] || [ $CLIENT_AUTH == "false" ]; then + TELEMETRY_ARGS+=" --allow_no_client_auth" + fi fi exec /usr/sbin/telemetry ${TELEMETRY_ARGS} diff --git a/src/sonic-config-engine/minigraph.py b/src/sonic-config-engine/minigraph.py index 8f8c90fab58..97c83b06616 100644 --- a/src/sonic-config-engine/minigraph.py +++ b/src/sonic-config-engine/minigraph.py @@ -1633,9 +1633,9 @@ def parse_xml(filename, platform=None, port_config_file=None, asic_name=None, hw 'log_level': '2' }, 'certs': { - 'server_crt': '/etc/sonic/telemetry/streamingtelemetryserver.cer', - 'server_key': '/etc/sonic/telemetry/streamingtelemetryserver.key', - 'ca_crt': '/etc/sonic/telemetry/dsmsroot.cer' + 'server_crt': '/etc/sonic/credentials/streamingtelemetryserver.cer', + 'server_key': '/etc/sonic/credentials/streamingtelemetryserver.key', + 'ca_crt': '/etc/sonic/credentials/dsmsroot.cer' } } results['RESTAPI'] = {