1111How:
1212 NOTE: The flow from APPL-DB to ASIC-DB takes non zero milliseconds.
1313 1) Initiate subscribe for ASIC-DB updates.
14- 2) Read APPL-DB & ASIC-DB
14+ 2) Read APPL-DB & ASIC-DB
1515 3) Get the diff.
16- 4) If any diff,
16+ 4) If any diff,
1717 4.1) Collect subscribe messages for a second
18- 4.2) check diff against the subscribe messages
18+ 4.2) check diff against the subscribe messages
1919 5) Rule out local interfaces & default routes
2020 6) If still outstanding diffs, report failure.
2121
2929 down to ensure failure.
3030 Analyze the reported failures to match expected.
3131 You may use the exit code to verify the result as success or not.
32-
32+
3333
3434
3535"""
4545import time
4646import signal
4747import traceback
48- import subprocess
4948
50- from ipaddress import ip_network
5149from swsscommon import swsscommon
5250from utilities_common import chassis
5351
7371
7472PRINT_MSG_LEN_MAX = 1000
7573
76- FRR_CHECK_RETRIES = 3
77- FRR_WAIT_TIME = 15
78-
7974class Level (Enum ):
8075 ERR = 'ERR'
8176 INFO = 'INFO'
@@ -146,7 +141,7 @@ def add_prefix(ip):
146141 ip = ip + PREFIX_SEPARATOR + "32"
147142 else :
148143 ip = ip + PREFIX_SEPARATOR + "128"
149- return str ( ip_network ( ip ))
144+ return ip
150145
151146
152147def add_prefix_ifnot (ip ):
@@ -155,7 +150,7 @@ def add_prefix_ifnot(ip):
155150 :param ip: IP to add prefix as string.
156151 :return ip with prefix
157152 """
158- return str ( ip_network ( ip )) if ip .find (PREFIX_SEPARATOR ) != - 1 else add_prefix (ip )
153+ return ip if ip .find (PREFIX_SEPARATOR ) != - 1 else add_prefix (ip )
159154
160155
161156def is_local (ip ):
@@ -298,7 +293,7 @@ def get_routes():
298293
299294def get_route_entries ():
300295 """
301- helper to read present route entries from ASIC-DB and
296+ helper to read present route entries from ASIC-DB and
302297 as well initiate selector for ASIC-DB:ASIC-state updates.
303298 :return (selector, subscriber, <list of sorted routes>)
304299 """
@@ -314,39 +309,14 @@ def get_route_entries():
314309 res , e = checkout_rt_entry (k )
315310 if res :
316311 rt .append (e )
317-
312+
318313 print_message (syslog .LOG_DEBUG , json .dumps ({"ASIC_ROUTE_ENTRY" : sorted (rt )}, indent = 4 ))
319314
320315 selector = swsscommon .Select ()
321316 selector .addSelectable (subs )
322317 return (selector , subs , sorted (rt ))
323318
324319
325- def is_suppress_fib_pending_enabled ():
326- """
327- Returns True if FIB suppression is enabled, False otherwise
328- """
329- cfg_db = swsscommon .ConfigDBConnector ()
330- cfg_db .connect ()
331-
332- state = cfg_db .get_entry ('DEVICE_METADATA' , 'localhost' ).get ('suppress-fib-pending' )
333-
334- return state == 'enabled'
335-
336-
337- def get_frr_routes ():
338- """
339- Read routes from zebra through CLI command
340- :return frr routes dictionary
341- """
342-
343- output = subprocess .check_output ('show ip route json' , shell = True )
344- routes = json .loads (output )
345- output = subprocess .check_output ('show ipv6 route json' , shell = True )
346- routes .update (json .loads (output ))
347- return routes
348-
349-
350320def get_interfaces ():
351321 """
352322 helper to read interface table from APPL-DB.
@@ -384,7 +354,7 @@ def filter_out_local_interfaces(keys):
384354
385355 chassis_local_intfs = chassis .get_chassis_local_interfaces ()
386356 local_if_lst .update (set (chassis_local_intfs ))
387-
357+
388358 db = swsscommon .DBConnector (APPL_DB_NAME , 0 )
389359 tbl = swsscommon .Table (db , 'ROUTE_TABLE' )
390360
@@ -523,61 +493,6 @@ def filter_out_standalone_tunnel_routes(routes):
523493 return updated_routes
524494
525495
526- def check_frr_pending_routes ():
527- """
528- Check FRR routes for offload flag presence by executing "show ip route json"
529- Returns a list of routes that have no offload flag.
530- """
531-
532- missed_rt = []
533-
534- retries = FRR_CHECK_RETRIES
535- for i in range (retries ):
536- missed_rt = []
537- frr_routes = get_frr_routes ()
538-
539- for _ , entries in frr_routes .items ():
540- for entry in entries :
541- if entry ['protocol' ] != 'bgp' :
542- continue
543-
544- # TODO: Also handle VRF routes. Currently this script does not check for VRF routes so it would be incorrect for us
545- # to assume they are installed in ASIC_DB, so we don't handle them.
546- if entry ['vrfName' ] != 'default' :
547- continue
548-
549- if not entry .get ('offloaded' , False ):
550- missed_rt .append (entry )
551-
552- if not missed_rt :
553- break
554-
555- time .sleep (FRR_WAIT_TIME )
556-
557- return missed_rt
558-
559-
560- def mitigate_installed_not_offloaded_frr_routes (missed_frr_rt , rt_appl ):
561- """
562- Mitigate installed but not offloaded FRR routes.
563-
564- In case route exists in APPL_DB, this function will manually send a notification to fpmsyncd
565- to trigger the flow that sends offload flag to zebra.
566-
567- It is designed to mitigate a problem when orchagent fails to send notification about installed route to fpmsyncd
568- or fpmsyncd not being able to read the notification or in case zebra fails to receive offload update due to variety of reasons.
569- All of the above mentioned cases must be considered as a bug, but even in that case we will report an error in the log but
570- given that this script ensures the route is installed in the hardware it will automitigate such a bug.
571- """
572- db = swsscommon .DBConnector ('APPL_STATE_DB' , 0 )
573- response_producer = swsscommon .NotificationProducer (db , f'{ APPL_DB_NAME } _{ swsscommon .APP_ROUTE_TABLE_NAME } _RESPONSE_CHANNEL' )
574- for entry in [entry for entry in missed_frr_rt if entry ['prefix' ] in rt_appl ]:
575- fvs = swsscommon .FieldValuePairs ([('err_str' , 'SWSS_RC_SUCCESS' ), ('protocol' , entry ['protocol' ])])
576- response_producer .send ('SWSS_RC_SUCCESS' , entry ['prefix' ], fvs )
577-
578- print_message (syslog .LOG_ERR , f'Mitigated route { entry ["prefix" ]} ' )
579-
580-
581496def get_soc_ips (config_db ):
582497 mux_table = config_db .get_table ('MUX_CABLE' )
583498 soc_ips = []
@@ -621,7 +536,7 @@ def check_routes():
621536 """
622537 The heart of this script which runs the checks.
623538 Read APPL-DB & ASIC-DB, the relevant tables for route checking.
624- Checkout routes in ASIC-DB to match APPL-DB, discounting local &
539+ Checkout routes in ASIC-DB to match APPL-DB, discounting local &
625540 default routes. In case of missed / unexpected entries in ASIC,
626541 it might be due to update latency between APPL & ASIC DBs. So collect
627542 ASIC-DB subscribe updates for a second, and checkout if you see SET
@@ -630,16 +545,12 @@ def check_routes():
630545 If there are still some unjustifiable diffs, between APPL & ASIC DB,
631546 related to routes report failure, else all good.
632547
633- If there are FRR routes that aren't marked offloaded but all APPL & ASIC DB
634- routes are in sync report failure and perform a mitigation action.
635-
636548 :return (0, None) on sucess, else (-1, results) where results holds
637549 the unjustifiable entries.
638550 """
639551 intf_appl_miss = []
640552 rt_appl_miss = []
641553 rt_asic_miss = []
642- rt_frr_miss = []
643554
644555 results = {}
645556 adds = []
@@ -688,22 +599,11 @@ def check_routes():
688599 if rt_asic_miss :
689600 results ["Unaccounted_ROUTE_ENTRY_TABLE_entries" ] = rt_asic_miss
690601
691- rt_frr_miss = check_frr_pending_routes ()
692-
693- if rt_frr_miss :
694- results ["missed_FRR_routes" ] = rt_frr_miss
695-
696602 if results :
697603 print_message (syslog .LOG_WARNING , "Failure results: {" , json .dumps (results , indent = 4 ), "}" )
698604 print_message (syslog .LOG_WARNING , "Failed. Look at reported mismatches above" )
699605 print_message (syslog .LOG_WARNING , "add: " , json .dumps (adds , indent = 4 ))
700606 print_message (syslog .LOG_WARNING , "del: " , json .dumps (deletes , indent = 4 ))
701-
702- if rt_frr_miss and not rt_appl_miss and not rt_asic_miss :
703- print_message (syslog .LOG_ERR , "Some routes are not set offloaded in FRR but all routes in APPL_DB and ASIC_DB are in sync" )
704- if is_suppress_fib_pending_enabled ():
705- mitigate_installed_not_offloaded_frr_routes (rt_frr_miss , rt_appl )
706-
707607 return - 1 , results
708608 else :
709609 print_message (syslog .LOG_INFO , "All good!" )
@@ -749,7 +649,7 @@ def main():
749649 return ret , res
750650 else :
751651 return ret , res
752-
652+
753653
754654
755655if __name__ == "__main__" :
0 commit comments