-
Notifications
You must be signed in to change notification settings - Fork 0
Dev sysready extension #13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7440088
452fb0c
3f1e0c4
dee1dc2
937623e
06e977a
37b97c8
9508de9
44c4bf0
e7a5d3a
f4006dd
cac3ed8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,6 +108,37 @@ function preStartAction() | |
| fi | ||
| {%- elif docker_container_name == "snmp" %} | ||
| $SONIC_DB_CLI STATE_DB HSET 'DEVICE_METADATA|localhost' chassis_serial_number $(decode-syseeprom -s) | ||
|
|
||
| # Wait for system ready. Continue after timeout if we won't hear anything | ||
| # from DB. Read timeout from config file and add two extra minutes on top of | ||
| # it. | ||
| CONFFILE=system_health_monitoring_config.json | ||
| PLATFORM=$(sonic-cfggen -d -v "DEVICE_METADATA['localhost']['platform']") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try to use cache rather than using sonic-cfggen as sonic-cfggen is a costly operation. Please refer to |
||
| TIMEOUT=$(cat /usr/share/sonic/device/$PLATFORM/$CONFFILE | jq ".timeout") | ||
| SYSTEM_READY_EVENT="SYSTEM_READY|SYSTEM_STATE" | ||
| CHECK_SR_EVENT="sonic-db-dump -n STATE_DB -k $SYSTEM_READY_EVENT" | ||
|
|
||
| # Set default if not found | ||
| if [[ -z $TIMEOUT ]]; then | ||
| MESSAGE="Failed to read timeout from config file, assuming default is \ | ||
| 10 minutes" | ||
| echo $MESSAGE | ||
| TIMEOUT=10 | ||
| fi | ||
|
|
||
| # Add to extra minutes and convert to seconds and | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the rationale behind adding extra two minutes? |
||
| TIMEOUT=$(((TIMEOUT + 2) * 60)) | ||
|
|
||
| # Waiting for event | ||
| echo "Waiting for $SYSTEM_READY_EVENT event" | ||
| while [[ -z "$($CHECK_SR_EVENT | jq '.[]["value"]["Status"]')" ]]; do | ||
| UPTIME=$(awk -F. '{print $1}' /proc/uptime) | ||
| if [[ $UPTIME -gt $TIMEOUT ]]; then | ||
| echo "Got uptime timeout - starting snmp..." | ||
| break | ||
| fi | ||
| sleep 15 | ||
| done | ||
| {%- else %} | ||
| : # nothing | ||
| {%- endif %} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,11 +79,17 @@ | |
| "auto_restart": "{{autorestart}}", | ||
| "support_syslog_rate_limit" : "true", | ||
| {# Set check_up_status to true here when app readiness will be marked in state db #} | ||
| {%- if feature in ["swss", "syncd", "pmon"] %} | ||
| "check_up_status" : "true", | ||
| {%- endif %} | ||
| {# For now, to support the infrastrucure, setting the check_up_status to false for bgp,swss,pmon #} | ||
| {# Once apps like bgp,synd supports app readiness, then bgp,syncd can set check_up_status to true #} | ||
| {%- if feature in ["bgp", "swss", "pmon"] %} | ||
| {%- if feature in ["bgp"] %} | ||
| "check_up_status" : "false", | ||
| {%- endif %} | ||
| {%- if feature in ["ib-utils", "snmp"] %} | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove ib-utils There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove ib-utils as you commented |
||
| "irrel_for_sysready" : "true", | ||
fastiuk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| {%- endif %} | ||
| {%- if include_kubernetes == "y" %} | ||
| {%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry", "gnmi"] %} | ||
| "set_owner": "kube", {% else %} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,8 @@ function startplatform() { | |
| /usr/bin/mlnx-fw-upgrade.sh -v | ||
| if [[ "$?" -ne "${EXIT_SUCCESS}" ]]; then | ||
| debug "Failed to upgrade fw. " "$?" "Restart syncd" | ||
| sonic-db-cli STATE_DB HSET "FEATURE|$DEV_SRV" fail_reason \ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we considering just the asic firmware update as sysready indication for syncd. Shouldn't it be the create switch success? |
||
| "ASIC FW update failed" up_status false | ||
| exit 1 | ||
| fi | ||
| /etc/init.d/sxdkernel restart | ||
|
|
@@ -86,6 +88,9 @@ function startplatform() { | |
| /etc/init.d/xpnet.sh start | ||
| fi | ||
| fi | ||
|
|
||
| sonic-db-cli STATE_DB HDEL "FEATURE|$DEV_SRV" fail_reason | ||
| sonic-db-cli STATE_DB HSET "FEATURE|$DEV_SRV" up_status true | ||
| } | ||
|
|
||
| function waitplatform() { | ||
|
|
@@ -169,9 +174,11 @@ LOCKFILE="/tmp/swss-syncd-lock$DEV" | |
| NAMESPACE_PREFIX="asic" | ||
| if [ "$DEV" ]; then | ||
| NET_NS="$NAMESPACE_PREFIX$DEV" #name of the network namespace | ||
| DEV_SRV="$SERVICE@$DEV" | ||
| SONIC_DB_CLI="sonic-db-cli -n $NET_NS" | ||
| else | ||
| NET_NS="" | ||
| DEV_SRV="$SERVICE" | ||
| SONIC_DB_CLI="sonic-db-cli" | ||
| fi | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you enabling this feature by default for snmp? Shouldn't it be based on configuration?