From fe3f168037b3c4b50afcde5cb56f71ec22e4e220 Mon Sep 17 00:00:00 2001 From: Andriy Moroz Date: Tue, 13 Aug 2019 20:51:05 +0300 Subject: [PATCH 01/16] Increase delay before RIF remove (#1061) * [vlan_trunk] stabilize vlan trunk test - use 'config' commands to brind down/up interfaces - generate 'Vlan': {} in config to specify this interface belongs to global vrf as it is required (TODO: use config CLI instead); - wait longer time after LAG shutdown; Test tries to configure LAG as part of VLAN interface which will fail if LAG is a RIF. Since RIF in SONiC is removed only when the ref_count becomes 0, we'll have to wait for orchagent to remove all routes, next hop groups, neighbors that reference those LAG RIFs. The time is required is not very deterministic and can differ between platforms but it is assumed 90 sec is enough. One other way to do this test is to create seperate PortChannels. Also, it looks that master has significantly slower performence to remove routes, next hops then 201811 branch. Signed-off-by: Stepan Blyschak * Revert delay before RIF remove back to 10s * [vlan_trunk] Increase delay before RIF remove --- ansible/roles/test/tasks/vlan_configure.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/test/tasks/vlan_configure.yml b/ansible/roles/test/tasks/vlan_configure.yml index 8f27c5f482a..c9ac5506d1f 100644 --- a/ansible/roles/test/tasks/vlan_configure.yml +++ b/ansible/roles/test/tasks/vlan_configure.yml @@ -38,7 +38,7 @@ # otherwise PortChannel RIFs are still referenced and won't be removed # cause below vlan_configuration.json fail to apply - name: sleep for some time - pause: seconds=10 + pause: seconds=90 # TODO: convert VLAN configuration into CLI commands - name: Generate nessesary configuration for test From 3221e4eec1ec7c4ebb5416384b906ad0dd3da74d Mon Sep 17 00:00:00 2001 From: Manju V Date: Thu, 15 Aug 2019 22:18:51 +0530 Subject: [PATCH 02/16] Enhance vnet_vxlan for scaling of vnets and routes (#937) * Enhance vnet_vxlan for scaling of vnets and routes * Enhance vnet_vxlan for scaling of vnets and routes * incorported changes for ipv6 * Modified to select fewer routes in case of scaling * Corrected num_routes_batch variable * Modified the logic in choosing one route from the list for each vnet * Corrected the IP in local routes * Changes to limit the maximum RIFs to 128 --- .../roles/test/files/ptftests/vnet_vxlan.py | 91 ++++-- ansible/roles/test/templates/vnet_config.j2 | 292 +++++++++++++----- ansible/roles/test/templates/vnet_routes.j2 | 5 +- ansible/roles/test/templates/vnet_vxlan.j2 | 2 +- 4 files changed, 291 insertions(+), 99 deletions(-) diff --git a/ansible/roles/test/files/ptftests/vnet_vxlan.py b/ansible/roles/test/files/ptftests/vnet_vxlan.py index 1173cce07bb..e59638e5e70 100644 --- a/ansible/roles/test/files/ptftests/vnet_vxlan.py +++ b/ansible/roles/test/files/ptftests/vnet_vxlan.py @@ -36,6 +36,8 @@ def __init__(self): self.vxlan_router_mac = '00:aa:bb:cc:78:9a' self.vxlan_port = 13330 self.DEFAULT_PKT_LEN = 100 + self.max_routes_wo_scaling = 1000 + self.vnet_batch = 8 def cmd(self, cmds): process = subprocess.Popen(cmds, @@ -82,8 +84,8 @@ def getSrvInfo(self, vnet, ifname=''): def checkPeer(self, test): for peers in self.peering: for key, peer in peers.items(): - ptest = dict(test) - if ptest['name'] == key: + if test['name'] == key: + ptest = dict(test) ptest['name'] = peer ptest['src'], ptest['port'], ptest['vlan'], ptest['vni'] = self.getSrvInfo(ptest['name']) if 'dst_vni' in test: @@ -93,13 +95,15 @@ def checkPeer(self, test): def checklocal(self, graph, test): for routes in graph['vnet_local_routes']: for name, rt_list in routes.items(): - for entry in rt_list: - nhtest = dict(test) - if nhtest['name'] == name.split('_')[0]: - nhtest['src'], nhtest['port'], nhtest['vlan'], nhtest['vni'] = self.getSrvInfo(nhtest['name'], entry['ifname']) - prefix = ip_network(unicode(entry['pfx'])) - nhtest['src'] = str(list(prefix.hosts())[0]) - self.tests.append(nhtest) + if test['name'] == name.split('_')[0]: + if self.total_routes <= self.max_routes_wo_scaling: + for entry in rt_list: + self.addLocalTest(test, entry) + else: + vnet_id = int(name.split('_')[0][4:]) + rt_idx = ((vnet_id-1)//4)%len(rt_list) + entry = rt_list[rt_idx] + self.addLocalTest(test, entry) def getPeerTest(self, test): peer_vnets = [] @@ -116,6 +120,43 @@ def getPeerTest(self, test): return peer_tests + def addTest(self, graph, name, entry): + test = {} + test['name'] = name.split('_')[0] + test['dst'] = entry['pfx'].split('/')[0] + test['host'] = entry['end'] + if 'mac' in entry: + test['mac'] = entry['mac'] + else: + test['mac'] = self.vxlan_router_mac + test['src'], test['port'], test['vlan'], test['vni'] = self.getSrvInfo(test['name']) + if 'vni' in entry: + test['dst_vni'] = entry['vni'] + self.tests.append(test) + self.checkPeer(test) + self.checklocal(graph, test) + + def addLocalTest(self, test, entry): + nhtest = dict(test) + nhtest['src'], nhtest['port'], nhtest['vlan'], nhtest['vni'] = self.getSrvInfo(nhtest['name'], entry['ifname']) + prefix = ip_network(unicode(entry['pfx'])) + nhtest['src'] = str(list(prefix.hosts())[0]) + self.tests.append(nhtest) + + def calculateTotalRoutes(self, graph): + self.total_routes = 0 + for routes in graph['vnet_routes']: + for name, rt_list in routes.items(): + self.total_routes += len(rt_list) + for peers in graph['vnet_peers']: + for key, peer in peers.items(): + if name.split('_')[0] == key: + self.total_routes += len(rt_list) + for l_routes in graph['vnet_local_routes']: + for l_name, l_rt_list in l_routes.items(): + if name == l_name: + self.total_routes += len(l_rt_list) + def setUp(self): self.dataplane = ptf.dataplane_instance @@ -162,11 +203,12 @@ def setUp(self): vni_base = 10000 self.serv_info = {} self.nbr_info = [] + acc_ports_size = len(self.acc_ports) for idx, data in enumerate(graph['vnet_interfaces']): if data['vnet'] not in self.serv_info: self.serv_info[data['vnet']] = [] serv_info = {} - ports = self.acc_ports[idx] + ports = self.acc_ports[idx % acc_ports_size] for nbr in graph['vnet_neighbors']: if nbr['ifname'] == data['ifname']: if 'Vlan' in data['ifname']: @@ -183,25 +225,24 @@ def setUp(self): self.serv_info[data['vnet']].extend([serv_info]) self.peering = graph['vnet_peers'] + self.calculateTotalRoutes(graph) self.tests = [] for routes in graph['vnet_routes']: for name, rt_list in routes.items(): - for entry in rt_list: - test = {} - test['name'] = name.split('_')[0] - test['dst'] = entry['pfx'].split('/')[0] - test['host'] = entry['end'] - if 'mac' in entry: - test['mac'] = entry['mac'] - else: - test['mac'] = self.vxlan_router_mac - test['src'], test['port'], test['vlan'], test['vni'] = self.getSrvInfo(test['name']) - if 'vni' in entry: - test['dst_vni'] = entry['vni'] - self.tests.append(test) - self.checkPeer(test) - self.checklocal(graph, test) + if self.total_routes <= self.max_routes_wo_scaling: + for entry in rt_list: + self.addTest(graph, name, entry) + else: + vnet_id = int(name.split('_')[0][4:]) + len_rt = len(rt_list) + group_8 = (vnet_id-1)//self.vnet_batch + rt_idx = (group_8//2)%len_rt + if group_8%2: + rt_idx = (len_rt-1)-rt_idx + + entry = rt_list[rt_idx] + self.addTest(graph, name, entry) self.dut_mac = graph['dut_mac'] diff --git a/ansible/roles/test/templates/vnet_config.j2 b/ansible/roles/test/templates/vnet_config.j2 index c1c2c37d4ce..7282fb42329 100644 --- a/ansible/roles/test/templates/vnet_config.j2 +++ b/ansible/roles/test/templates/vnet_config.j2 @@ -1,21 +1,101 @@ +#jinja2: trim_blocks: True,lstrip_blocks: True +{# + For normal functional test, no need to pass the below configurations. + num_vnet - Default 8. + num_routes - (including peer & local routes) + value 24 - if ipv6_vxlan_test == true + value 12 - if ipv6_vxlan_test == false + num_endpoints - Default 8 -{% set num_vnet = 8 %} -{% set vnet_v6_base = 5 %} + For Scale test, below configurations are to be passed. + num_vnet - Mandatory for scale test. + num_routes - Optional. Default 16000 for scale test. + num_endpoints - Optional. Default 4000 for scale test. + Ex: ansible playbook extra vars: -e "num_vnet=51 num_routes=550 num_endpoints=200" + + Ethernet IP - 10.10.10.1/24 + Vlan IP - A.B.10.1/24; starts 30.1.10.1/24 (A,B derived from vlan id starting 3001) + Route Pfx - A.B.C.D/32; starts 100.1.1.1/32 + (A,B derived from 100+vnet_id; C,D derived from number of routes per vnet) + Route endpoint - A.B.C.D; starts from 200.1.1.1 (C,D derived from number of endpoints) + +#} +{% set vnet_v6_base = 4 %} {% set vlan_id_base = 3001 %} + +{# A single vnet batch contains 8 Vnets #} +{% set num_vnet_batch = 8 %} +num_vnet_batch: {{ num_vnet_batch }} + +{# A single vnet batch has total 18 routes including peers if ipv6_vxlan_test == true. + There will be only 9 routes if ipv6_vxlan_test == false #} +{% if ipv6_vxlan_test == true %} + {% set num_routes_batch = 18 %} +{% else %} + {% set num_routes_batch = 9 %} +{% endif %} + +{# Normal testing - 8 Vnets #} +{% if num_vnet is undefined or not num_vnet %} + {% set num_vnet = 8 %} + {% set num_routes_iterations = 1 %} + {% set num_endpoints = 4000 %} +{% endif %} +{% set num_vnet = num_vnet|int %} + +{# Convert num_vnet to a divisible of 8 since one batch has 8 vnets #} +{% set num_vnet = ((num_vnet//num_vnet_batch)|int)*num_vnet_batch %} + +{% if num_vnet <= 8 %} + {% set num_vnet = 8 %} + {% set num_routes_iterations = 1 %} + {% set num_endpoints = 4000 %} +{% else %} + {# Scale testing - Determine the number of routes per each Vnet batch (8 Vnets) #} + {% if num_routes is undefined or not num_routes %} + {% set num_routes = 16000 %} + {% endif %} + {% if num_endpoints is undefined or not num_endpoints %} + {% set num_endpoints = 4000 %} + {% endif %} + {% set num_routes = num_routes|int %} + {% set num_endpoints = num_endpoints|int %} + {% set num_routes_iterations = ((num_routes/num_routes_batch)/(num_vnet/num_vnet_batch))|round|int %} + {% if num_routes_iterations == 0 %} {% set num_routes_iterations = 1 %} {% endif %} +{% endif %} {% set topo_vlan = minigraph_vlans.keys()[0] %} -{% set index = 10 %} + +{# Max RIFs support currently is 128 #} +{% if num_vnet > 128 %} + {% set max_rif = 128 %} +{% else %} + {% set max_rif = num_vnet %} +{% endif %} + +{# Vlan Configurations + Vlan IP - A.B.10.1/24; starts 30.1.10.1/24 (A,B derived from vlan id) +#} vlan_intf_list: +{% set ports = minigraph_vlans[topo_vlan].members[1:] %} {% for vlan in range (vlan_id_base, vlan_id_base + num_vnet) %} + {% set vlan_str = vlan|string %} + {% set ip_a, ip_b = vlan_str[:2]|int, vlan_str[2:]|int %} + {% if ip_b == 0 %} + {% set ip_a, ip_b = ip_a-1, 100 %} + {% endif %} - vlan_id: '{{ (vlan|int) }}' ifname: 'Vlan{{ vlan }}' - ip: '{{ loop.index }}.{{ loop.index }}.10.1/24' - port: '{{ minigraph_vlans[topo_vlan].members[loop.index] }}' + ip: '{{ ip_a }}.{{ ip_b }}.10.1/24' + port: '{{ ports[loop.index0%(ports|length)] }}' {% endfor %} +{# Interface Configuration #} intf_list: +{% set index = 10 %} - ifname: {{ minigraph_vlans[topo_vlan].members[0] }} ip: '{{ index }}.{{ index }}.10.1/24' +{# Vnet Configurations #} vnet_id_list: {% for vnet in range (1, 1 + num_vnet) %} - Vnet{{ vnet }} @@ -25,100 +105,170 @@ vnet_id_list: vnet_v6_base: {{ vnet_v6_base }} {% endif %} +{# Vnet - Peer Configurations #} vnet_peer_list: - - Vnet3: Vnet4 - - Vnet4: Vnet3 - - Vnet7: Vnet8 - - Vnet8: Vnet7 +{% for vnet_batch in range (1, 1 + max_rif)|batch(4) %} + - Vnet{{ vnet_batch[2] }}: Vnet{{ vnet_batch[3] }} + - Vnet{{ vnet_batch[3] }}: Vnet{{ vnet_batch[2] }} +{% endfor %} +{% if num_vnet > max_rif %} + {% set peering_vnets = num_vnet - max_rif %} + {% for vnet_id in range (max_rif + 1, num_vnet + 1) %} + {% set peer_vnet = (loop.index0 % max_rif) + 1 %} + - Vnet{{ vnet_id }}: Vnet{{ peer_vnet }} + - Vnet{{ peer_vnet }}: Vnet{{ vnet_id }} + {% endfor %} +{% endif %} +{# Vnet - Interface Configurations #} vnet_intf_list: -{% for vlan in range (vlan_id_base, vlan_id_base + num_vnet) %} +{% for vlan in range (vlan_id_base, vlan_id_base + max_rif) %} - ifname: Vlan{{ vlan }} vnet: Vnet{{ loop.index }} {% endfor %} +{# Vnet - Neighbor Configurations #} vnet_nbr_list: -{% for vlan in range (vlan_id_base, vlan_id_base + num_vnet) %} +{% for vlan_batch in range (vlan_id_base, vlan_id_base + num_vnet)|batch(4) %} + {% for vlan in vlan_batch %} + {% set vlan_str = vlan|string %} + {% set ip_a, ip_b = vlan_str[:2]|int, vlan_str[2:]|int %} + {% if ip_b == 0 %} + {% set ip_a, ip_b = ip_a-1, 100 %} + {% endif %} - ifname: Vlan{{ vlan }} - ip: '{{ loop.index }}.{{ loop.index }}.10.101' + ip: '{{ ip_a }}.{{ ip_b }}.10.101' + {% if (loop.index0 == 0) or (loop.index0 == 1) %} + - ifname: Vlan{{ vlan }} + ip: '{{ ip_a }}.{{ ip_b }}.10.102' + {% endif %} + {% endfor %} {% endfor %} - - ifname: Vlan3001 - ip: '1.1.10.102' - - ifname: Vlan3002 - ip: '2.2.10.102' - - ifname: Vlan3005 - ip: '5.5.10.102' - - ifname: Vlan3006 - ip: '6.6.10.102' - ifname: {{ minigraph_vlans[topo_vlan].members[0] }} ip: '10.10.10.102' +{# Vnet - Local Routes #} vnet_local_routes: - - Vnet1_route_list: - - pfx: '50.1.1.0/24' - nh: '1.1.10.101' - ifname: 'Vlan3001' - - pfx: '50.2.2.0/24' - nh: '1.1.10.102' - ifname: 'Vlan3001' - - Vnet2_route_list: - - pfx: '60.1.1.0/24' - nh: '2.2.10.101' - ifname: 'Vlan3002' - - Vnet5_route_list: +{% for vlan_batch in range (vlan_id_base, vlan_id_base + max_rif)|batch(4) %} + - Vnet{{ vlan_batch[0]-vlan_id_base+1 }}_route_list: + {% set vlan_str = vlan_batch[0]|string %} + {% set ip_a, ip_b = vlan_str[:2]|int, vlan_str[2:]|int %} + {% if ip_b == 0 %} + {% set ip_a, ip_b = ip_a-1, 100 %} + {% endif %} - pfx: '50.1.1.0/24' - nh: '5.5.10.101' - ifname: 'Vlan3005' + nh: '{{ ip_a }}.{{ ip_b }}.10.101' + ifname: 'Vlan{{ vlan_batch[0] }}' - pfx: '50.2.2.0/24' - nh: '5.5.10.102' - ifname: 'Vlan3005' - - Vnet6_route_list: + nh: '{{ ip_a }}.{{ ip_b }}.10.102' + ifname: 'Vlan{{ vlan_batch[0] }}' + - Vnet{{ vlan_batch[1]-vlan_id_base+1 }}_route_list: + {% set vlan_str = vlan_batch[1]|string %} + {% set ip_a, ip_b = vlan_str[:2]|int, vlan_str[2:]|int %} + {% if ip_b == 0 %} + {% set ip_a, ip_b = ip_a-1, 100 %} + {% endif %} - pfx: '60.1.1.0/24' - nh: '6.6.10.101' - ifname: 'Vlan3006' + nh: '{{ ip_a }}.{{ ip_b }}.10.101' + ifname: 'Vlan{{ vlan_batch[1] }}' +{% endfor %} +{# Vnet - Routes + Route Pfx - A.B.C.D/32; starts 100.1.1.1/32 + (A,B derived from 100+vnet_id; C,D derived from number of routes per vnet) + Route endpoint - A.B.C.D; starts from 200.1.1.1 (C,D derived from number of endpoints) +#} vnet_route_list: - - Vnet1_route_list: - - pfx: '1.1.1.10/32' - end: '100.1.1.10' - - pfx: '1.1.1.11/32' - end: '100.1.1.11' +{% set endpoints_iters = (num_endpoints//(num_routes_iterations*num_vnet_batch))|int %} +{% if endpoints_iters == 0 %} {% set endpoints_iters = 1 %} {% endif %} +{% for vnet_batch in range (1, 1 + num_vnet)|batch(8) %} + {% set endpoints_idx = (loop.index0%endpoints_iters)*num_routes_iterations*num_vnet_batch+1 %} + {% set ip_r_a, ip_r_b = (vnet_batch[0]//100)|int+100, (vnet_batch[0]%100)|int %} + {% if ip_r_b == 0 %} {% set ip_r_a, ip_r_b = ip_r_a-1, 100 %} {% endif %} + {% set ip_e_a, ip_e_b = ((endpoints_idx//100)|int)+1, (endpoints_idx%100)|int %} + {% if ip_e_b == 0 %} {% set ip_e_a, ip_e_b = ip_e_a-1, 100 %} {% endif %} + - Vnet{{ vnet_batch[0] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set temp = loop.index0*2+1 %} + {% set idx_a, idx_b = ((temp//100)|int)+1, (temp%100)|int %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b }}/32' + end: '200.1.{{ ip_e_a }}.{{ (loop.index0*2)+ip_e_b }}' + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b+1 }}/32' + end: '200.1.{{ ip_e_a }}.{{ (loop.index0*2)+ip_e_b+1 }}' mac: '00:00:00:00:01:02' - - Vnet2_route_list: - - pfx: '2.2.2.10/32' - end: '100.1.1.20' + {% endfor %} + - Vnet{{ vnet_batch[1] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set idx_a, idx_b = ((loop.index//100)|int)+1, (loop.index%100)|int %} + {% if idx_b == 0 %} + {% set idx_a, idx_b = idx_a-1, 100 %} + {% endif %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b+1 }}.{{ idx_a }}.{{ idx_b }}/32' + end: '200.2.{{ ip_e_a }}.{{ loop.index0+ip_e_b }}' mac: '00:00:00:00:02:05' - - Vnet3_route_list: - - pfx: '1.1.1.10/32' - end: '100.1.1.10' + {% endfor %} + - Vnet{{ vnet_batch[2] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set temp = loop.index0*2+1 %} + {% set idx_a, idx_b = ((temp//100)|int)+1, (temp%100)|int %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b }}/32' + end: '200.1.{{ ip_e_a }}.{{ (loop.index0*2)+ip_e_b }}' vni: '12345' - - pfx: '1.1.1.11/32' - end: '100.1.1.11' + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b+1 }}/32' + end: '200.1.{{ ip_e_a }}.{{ (loop.index0*2)+ip_e_b+1 }}' mac: '00:00:00:00:01:02' - - Vnet4_route_list: - - pfx: '4.4.4.10/32' - end: '100.1.1.40' + {% endfor %} + - Vnet{{ vnet_batch[3] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set idx_a, idx_b = ((loop.index//100)|int)+1, (loop.index%100)|int %} + {% if idx_b == 0 %} + {% set idx_a, idx_b = idx_a-1, 100 %} + {% endif %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b+3 }}.{{ idx_a }}.{{ idx_b }}/32' + end: '200.4.{{ ip_e_a }}.{{ loop.index0+ip_e_b }}' mac: '00:00:00:00:02:05' + {% endfor %} {% if ipv6_vxlan_test == true %} - - Vnet5_route_list: - - pfx: '1.1.1.10/32' - end: '2000:1::33' - - pfx: '1.1.1.11/32' - end: '2000:1::34' + - Vnet{{ vnet_batch[4] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set temp = loop.index0*2+1 %} + {% set idx_a, idx_b = ((temp//100)|int)+1, (temp%100)|int %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b }}/32' + end: 'FC00:1::{{ ip_e_a }}:{{ (loop.index0*2)+ip_e_b }}' + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b+1 }}/32' + end: 'FC00:1::{{ ip_e_a }}:{{ (loop.index0*2)+ip_e_b+1 }}' mac: '00:00:00:00:01:02' - - Vnet6_route_list: - - pfx: '2.2.2.10/32' - end: '2000:1::35' + {% endfor %} + - Vnet{{ vnet_batch[5] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set idx_a, idx_b = ((loop.index//100)|int)+1, (loop.index%100)|int %} + {% if idx_b == 0 %} + {% set idx_a, idx_b = idx_a-1, 100 %} + {% endif %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b+1 }}.{{ idx_a }}.{{ idx_b }}/32' + end: 'FC00:2::{{ ip_e_a }}:{{ loop.index0+ip_e_b }}' mac: '00:00:00:00:02:05' - - Vnet7_route_list: - - pfx: '1.1.1.10/32' - end: '2000:1::33' + {% endfor %} + - Vnet{{ vnet_batch[6] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set temp = loop.index0*2+1 %} + {% set idx_a, idx_b = ((temp//100)|int)+1, (temp%100)|int %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b }}/32' + end: 'FC00:1::{{ ip_e_a }}:{{ (loop.index0*2)+ip_e_b }}' vni: '12345' - - pfx: '1.1.1.11/32' - end: '2000:1::34' + - pfx: '{{ ip_r_a }}.{{ ip_r_b }}.{{ idx_a }}.{{ idx_b+1 }}/32' + end: 'FC00:1::{{ ip_e_a }}:{{ (loop.index0*2)+ip_e_b+1 }}' mac: '00:00:00:00:01:02' - - Vnet8_route_list: - - pfx: '4.4.4.10/32' - end: 'FC00:1::36' + {% endfor %} + - Vnet{{ vnet_batch[7] }}_route_list: + {% for idx in range (num_routes_iterations) %} + {% set idx_a, idx_b = ((loop.index//100)|int)+1, (loop.index%100)|int %} + {% if idx_b == 0 %} + {% set idx_a, idx_b = idx_a-1, 100 %} + {% endif %} + - pfx: '{{ ip_r_a }}.{{ ip_r_b+3 }}.{{ idx_a }}.{{ idx_b }}/32' + end: 'FC00:4::{{ ip_e_a }}:{{ loop.index0+ip_e_b }}' mac: '00:00:00:00:02:05' + {% endfor %} {% endif %} +{% endfor %} diff --git a/ansible/roles/test/templates/vnet_routes.j2 b/ansible/roles/test/templates/vnet_routes.j2 index 46c5db59352..004e445c880 100644 --- a/ansible/roles/test/templates/vnet_routes.j2 +++ b/ansible/roles/test/templates/vnet_routes.j2 @@ -77,11 +77,12 @@ {% set outloop = loop %} {% for vnet_intf in vnet_intf_list %} {% if vnet_intf.ifname == vlan_intf.ifname %} +{% set vlan_intf_ip = vlan_intf.ip[:-5] %} {% for peers in vnet_peer_list %} {% for key,peer in peers.items() %} {% if key == vnet_intf.vnet %} { - "VNET_ROUTE_TABLE:{{ peer }}:{{ outloop.index }}.{{ outloop.index }}.10.0/24": { + "VNET_ROUTE_TABLE:{{ peer }}:{{ vlan_intf_ip }}.0/24": { "ifname": "Vlan{{ vlan_intf.vlan_id }}" }, "OP": "{{ op }}" @@ -90,7 +91,7 @@ {% endfor %} {% endfor %} { - "VNET_ROUTE_TABLE:{{ vnet_intf.vnet }}:{{ outloop.index }}.{{ outloop.index }}.10.0/24": { + "VNET_ROUTE_TABLE:{{ vnet_intf.vnet }}:{{ vlan_intf_ip }}.0/24": { "ifname": "Vlan{{ vlan_intf.vlan_id }}" }, "OP": "{{ op }}" diff --git a/ansible/roles/test/templates/vnet_vxlan.j2 b/ansible/roles/test/templates/vnet_vxlan.j2 index c6082eb4bfd..4b53e430c4d 100644 --- a/ansible/roles/test/templates/vnet_vxlan.j2 +++ b/ansible/roles/test/templates/vnet_vxlan.j2 @@ -13,7 +13,7 @@ "VNET": { {% for vnet in vnet_id_list %} "{{ vnet }}": { -{% if (ipv6_vxlan_test == false) or (loop.index < vnet_v6_base) %} +{% if (ipv6_vxlan_test == false) or ((loop.index0%num_vnet_batch) < vnet_v6_base) %} "vxlan_tunnel": "tunnel_v4", {% else %} "vxlan_tunnel": "tunnel_v6", From 99cd272407c5afaab9548827a2eff6dac5cd15b4 Mon Sep 17 00:00:00 2001 From: Gord Chen Date: Fri, 16 Aug 2019 07:04:07 +0800 Subject: [PATCH 03/16] [Config Test] swss exit after remove port channel due to SAI not found intf (#1028) When executing the config test case, it has the possibility to cause swss exit. It is due to the intfOrch may be slower than portOrch When the issue not happens, the timeline would be 1. Playbook remove IP from port channel intfOrch want to remove l3 intf, but neighOrch still have reference and the intfOrch will wait to next run 2. neighOrch decrease the reference count neighOrch finish the reference count decrement and then intfOrch remove l3 intf to SAI 3. Playbook remove port channel portOrch remove this port in SAI When the issue happens, the timeline would be 1. Playbook remove IP from port channel intfOrch want to remove l3 intf, but neighOrch still have reference and the intfOrch will wait to next run 2. Playbook remove port channel portOrch remove this port and its l3 intf in SAI 3. neighOrch decrease the reference count neighOrch finish the reference count decrement and then intfOrch remove l3 intf to SAI whcih was removed by step 2. Then the swss will exception --- ansible/roles/test/tasks/config.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ansible/roles/test/tasks/config.yml b/ansible/roles/test/tasks/config.yml index 001a82d475d..c0aa4c3ab31 100644 --- a/ansible/roles/test/tasks/config.yml +++ b/ansible/roles/test/tasks/config.yml @@ -101,12 +101,16 @@ become: yes when: add_tmp_portchannel_ip + - pause: seconds=5 + - name: Remove {{ portchannel_members }} from {{ tmp_portchannel }} shell: config portchannel member del {{ tmp_portchannel }} {{ item }} become: yes when: add_tmp_portchannel_members with_items: "{{portchannel_members}}" + - pause: seconds=5 + - name: Remove {{ tmp_portchannel }} shell: config portchannel del {{ tmp_portchannel }} become: yes From 769e816005e05aa94820cac5940823af5bd1e3f7 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Thu, 15 Aug 2019 18:17:41 -0700 Subject: [PATCH 04/16] Some improvements for vxlan decap test (#1071) 1. Do reboot before the test 2. Send 10 packets instead of 1 packets --- .../roles/test/tasks/common_tasks/reboot_sonic.yml | 9 +++++++-- ansible/roles/test/tasks/vxlan-decap.yml | 12 ++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml b/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml index 5c844a68723..b241d4b59ac 100644 --- a/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml +++ b/ansible/roles/test/tasks/common_tasks/reboot_sonic.yml @@ -7,6 +7,11 @@ reboot_type: "reboot" when: reboot_type is not defined +- name: set default value for sonic ready timeout + set_fact: + ready_timeout: 180 + when: ready_timeout is not defined + - fail: msg: "Reboot type {{ reboot_type }} is invalid. Must be one of {{ reboot_types }}" when: reboot_type not in reboot_types @@ -57,8 +62,8 @@ timeout: 180 changed_when: false -- name: wait for 2 minute for prcesses and interfaces to be stable - pause: seconds=120 +- name: wait for {{ ready_timeout }} seconds for prcesses and interfaces to be stable + pause: seconds={{ ready_timeout }} - name: Wait for warmboot-finalizer service to finish become: true diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index 70b72c21667..08f7b6ecd2c 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -35,6 +35,11 @@ supervisorctl: state=restarted name=arp_responder delegate_to: "{{ ptf_host }}" + - name: Restart DUT. Wait 240 seconds after SONiC started ssh + include: reboot.yml + vars: + ready_timeout: 240 + - name: Render DUT parameters to json file for the test template: src=vxlan_decap.json.j2 dest=/tmp/vxlan_decap.json delegate_to: "{{ ptf_host }}" @@ -60,7 +65,7 @@ ptf_test_params: - vxlan_enabled=False - config_file='/tmp/vxlan_decap.json' - - repetitions=1 + - count=10 - name: Configure vxlan decap tunnel shell: sonic-cfggen -j /tmp/vxlan_db.tunnel.json --write-to-db @@ -80,7 +85,7 @@ ptf_test_params: - vxlan_enabled=True - config_file='/tmp/vxlan_decap.json' - - count=1 + - count=10 - name: Remove vxlan tunnel map configuration for {{ item }} shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" @@ -100,8 +105,7 @@ ptf_test_params: - vxlan_enabled=False - config_file='/tmp/vxlan_decap.json' - - repetitions=1 - + - count=10 - always: - name: Remove vxlan tunnel map configuration for {{ item }} From 454bed7b97cef5e16053c10fcdfe7e4e36dbdf64 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 19 Aug 2019 11:20:04 -0700 Subject: [PATCH 05/16] [warm-reboot] Add lag flap check after warm boot (#1066) * Add lag flap check after warm boot Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 23 +++++++++++++++++-- ansible/roles/test/files/ptftests/arista.py | 20 ++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index b13cfaa4463..7a94bc4d4b6 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -777,8 +777,8 @@ def wait_for_ssh_threads(): if self.reboot_type == 'fast-reboot' and no_cp_replies < 0.95 * self.nr_vl_pkts: self.fails['dut'].add("Dataplane didn't route to all servers, when control-plane was down: %d vs %d" % (no_cp_replies, self.nr_vl_pkts)) - if self.reboot_type == 'warm-reboot' and self.preboot_oper is not None: - if self.pre_handle is not None: + if self.reboot_type == 'warm-reboot': + if self.preboot_oper is not None and self.pre_handle is not None: self.log("Postboot checks:") log_info, fails = self.pre_handle.verify(pre_check=False) self.populate_fail_info(fails) @@ -786,6 +786,10 @@ def wait_for_ssh_threads(): self.log(log) self.log(" ") + else: + # verify there are no interface flaps after warm boot + self.neigh_lag_status_check() + except Exception as e: self.fails['dut'].add(e) finally: @@ -864,6 +868,21 @@ def wait_for_ssh_threads(): self.assertTrue(is_good, errors) + def neigh_lag_status_check(self): + """ + Ensure there are no interface flaps after warm-boot + """ + for neigh in self.ssh_targets: + self.neigh_handle = Arista(neigh, None, self.test_params) + self.neigh_handle.connect() + fails, flap_cnt = self.neigh_handle.verify_neigh_lag_no_flap() + self.neigh_handle.disconnect() + self.fails[neigh] |= fails + if not flap_cnt: + self.log("No LAG flaps seen on %s after warm boot" % neigh) + else: + self.fails[neigh].add("LAG flapped %s times on %s after warm boot" % (flap_cnt, neigh)) + def extract_no_cpu_replies(self, arr): """ This function tries to extract number of replies from dataplane, when control plane is non working diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index fcd4b2ecf8f..04459417849 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -137,6 +137,9 @@ def run(self): sample["po_changetime"] = json.loads(portchannel_output, strict=False)['interfaces']['Port-Channel1']['lastStatusChangeTimestamp'] if not run_once: + # clear Portchannel counters + self.do_cmd("clear counters Port-Channel 1") + self.ipv4_gr_enabled, self.ipv6_gr_enabled, self.gr_timeout = self.parse_bgp_neighbor_once(bgp_neig_output) if self.gr_timeout is not None: log_first_line = "session_begins_%f" % cur_time @@ -423,6 +426,23 @@ def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): self.fails.add('%s: Invalid interface name' % msg_prefix[pre_check]) return self.fails, lag_state + def verify_neigh_lag_no_flap(self): + flap_cnt = sys.maxint + output = self.do_cmd('show interfaces Po1 | json') + if 'Invalid' not in output: + data = '\n'.join(output.split('\r\n')[1:-1]) + obj = json.loads(data) + + if 'interfaces' in obj and 'Port-Channel1' in obj['interfaces']: + intf_cnt_info = obj['interfaces']['Port-Channel1']['interfaceCounters'] + flap_cnt = intf_cnt_info['linkStatusChanges'] + else: + self.fails.add('Object missing in output for Port-Channel1') + return self.fails, flap_cnt + + self.fails.add('Invalid interface name - Po1') + return self.fails, flap_cnt + def check_gr_peer_status(self, output): # [0] True 'ipv4_gr_enabled', [1] doesn't matter 'ipv6_enabled', [2] should be >= 120 if not self.ipv4_gr_enabled: From 900e6d3a89c6173360b399cb98340c7776ff5347 Mon Sep 17 00:00:00 2001 From: Neetha John Date: Mon, 19 Aug 2019 14:12:10 -0700 Subject: [PATCH 06/16] Add sensor data for Arista-7260CX3-Q64 (#1074) Signed-off-by: Neetha John --- ansible/group_vars/sonic/sku-sensors-data.yml | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/ansible/group_vars/sonic/sku-sensors-data.yml b/ansible/group_vars/sonic/sku-sensors-data.yml index c913cab864e..cbe42a03d3a 100644 --- a/ansible/group_vars/sonic/sku-sensors-data.yml +++ b/ansible/group_vars/sonic/sku-sensors-data.yml @@ -2279,3 +2279,99 @@ sensors_checks: temp: [] psu_skips: {} + + Arista-7260CX3-Q64: + alarms: + fan: + - pmbus-i2c-3-58/fan1/fan1_alarm + - pmbus-i2c-4-58/fan1/fan1_alarm + - pmbus-i2c-3-58/fan1/fan1_fault + - pmbus-i2c-4-58/fan1/fan1_fault + - la_cpld-i2c-85-60/fan1/fan1_fault + - la_cpld-i2c-85-60/fan2/fan2_fault + - la_cpld-i2c-85-60/fan3/fan3_fault + - la_cpld-i2c-85-60/fan4/fan4_fault + power: + - pmbus-i2c-3-58/iin/curr1_max_alarm + - pmbus-i2c-3-58/iout1/curr2_max_alarm + - pmbus-i2c-3-58/iout1/curr2_crit_alarm + - pmbus-i2c-3-58/iout2/curr3_crit_alarm + - pmbus-i2c-3-58/vin/in1_alarm + - pmbus-i2c-3-58/vout1/in2_lcrit_alarm + - pmbus-i2c-3-58/vout1/in2_crit_alarm + - pmbus-i2c-4-58/iin/curr1_max_alarm + - pmbus-i2c-4-58/iout1/curr2_max_alarm + - pmbus-i2c-4-58/iout1/curr2_crit_alarm + - pmbus-i2c-4-58/iout2/curr3_crit_alarm + - pmbus-i2c-4-58/vin/in1_alarm + - pmbus-i2c-4-58/vout1/in2_lcrit_alarm + - pmbus-i2c-4-58/vout1/in2_crit_alarm + temp: + - coretemp-isa-0000/Physical id 0/temp1_crit_alarm + - coretemp-isa-0000/Core 0/temp2_crit_alarm + - coretemp-isa-0000/Core 1/temp3_crit_alarm + - lm73-i2c-88-48/Front panel temp sensor/temp1_min_alarm + - lm73-i2c-88-48/Front panel temp sensor/temp1_max_alarm + - max6658-i2c-1-4c/Asic temp sensor/temp1_min_alarm + - max6658-i2c-1-4c/Asic temp sensor/temp1_max_alarm + - max6658-i2c-1-4c/Asic temp sensor/temp1_crit_alarm + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_min_alarm + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max_alarm + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_crit_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_min_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_crit_alarm + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_fault + - pmbus-i2c-3-58/Power supply 1 exhaust temp sensor/temp3_alarm + - pmbus-i2c-3-58/Power supply 1 inlet temp sensor/temp2_alarm + - pmbus-i2c-3-58/Power supply 1 hotspot sensor/temp1_alarm + - pmbus-i2c-4-58/Power supply 2 exhaust temp sensor/temp3_alarm + - pmbus-i2c-4-58/Power supply 2 inlet temp sensor/temp2_alarm + - pmbus-i2c-4-58/Power supply 2 hotspot sensor/temp1_alarm + + compares: + fan: [] + power: + - - pmbus-i2c-3-58/iin/curr1_input + - pmbus-i2c-3-58/iin/curr1_max + - - pmbus-i2c-3-58/iout1/curr2_input + - pmbus-i2c-3-58/iout1/curr2_max + - - pmbus-i2c-4-58/iin/curr1_input + - pmbus-i2c-4-58/iin/curr1_max + - - pmbus-i2c-4-58/iout1/curr2_input + - pmbus-i2c-4-58/iout1/curr2_max + temp: + - - coretemp-isa-0000/Physical id 0/temp1_input + - coretemp-isa-0000/Physical id 0/temp1_max + - - coretemp-isa-0000/Core 0/temp2_input + - coretemp-isa-0000/Core 0/temp2_max + - - coretemp-isa-0000/Core 1/temp3_input + - coretemp-isa-0000/Core 1/temp3_max + - - lm73-i2c-88-48/Front panel temp sensor/temp1_input + - lm73-i2c-88-48/Front panel temp sensor/temp1_max + - - max6658-i2c-1-4c/Asic temp sensor/temp1_input + - max6658-i2c-1-4c/Asic temp sensor/temp1_max + - - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_input + - max6658-i2c-73-4c/Back panel temp sensor 1/temp1_max + - - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_input + - max6658-i2c-73-4c/Back panel temp sensor 2/temp2_max + + non_zero: + fan: + - pmbus-i2c-3-58/fan1/fan1_input + - pmbus-i2c-4-58/fan1/fan1_input + - la_cpld-i2c-85-60/fan1/fan1_input + - la_cpld-i2c-85-60/fan2/fan2_input + - la_cpld-i2c-85-60/fan3/fan3_input + - la_cpld-i2c-85-60/fan4/fan4_input + power: + - pmbus-i2c-4-58/pin/power1_input + - pmbus-i2c-4-58/pout1/power2_input + - pmbus-i2c-4-58/pout2/power3_input + - pmbus-i2c-3-58/pin/power1_input + - pmbus-i2c-3-58/pout1/power2_input + - pmbus-i2c-3-58/pout2/power3_input + temp: + - pch_haswell-virtual-0/temp1/temp1_input + + psu_skips: {} From 703d97fca9a1483eb92c1639eeff7840ebcd5acd Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Mon, 19 Aug 2019 19:03:04 -0700 Subject: [PATCH 07/16] [vxlan-decap]: Improvements of the test (#1075) * Output offset of ports in data structures * Warmup DUT before the test * Increase wating time from 0.2 to 0.5 * Quote minigraph_vlans otherwise ansible could remove this variable --- .../roles/test/files/ptftests/vxlan-decap.py | 77 ++++++++++++++----- ansible/roles/test/tasks/vxlan-decap.yml | 10 +-- 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index e20b0a15687..24058f63be7 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -157,8 +157,36 @@ def setUp(self): def tearDown(self): return - def runTest(self): - print + def warmup(self): + print "Warming up" + err = '' + trace = '' + ret = 0 + try: + for test in self.tests: + if self.vxlan_enabled: + self.Vxlan(test, True) + self.RegularLAGtoVLAN(test, True) + self.RegularVLANtoLAG(test, True) + + except Exception as e: + err = str(e) + trace = traceback.format_exc() + ret = -1 + if ret != 0: + print "The warmup failed" + print + print "Error: %s" % err + print + print trace + else: + print "Warmup successful\n" + sys.stdout.flush() + if ret != 0: + raise AssertionError("Warmup failed") + + def work_test(self): + print "Testing" err = '' trace = '' ret = 0 @@ -194,28 +222,37 @@ def runTest(self): if ret != 0: raise AssertionError(err) - def Vxlan(self, test): - for n in self.net_ports: - for a in test['acc_ports']: + + def runTest(self): + print + # Warm-up first + self.warmup() + # test itself + self.work_test() + + + def Vxlan(self, test, wu = False): + for i, n in enumerate(self.net_ports): + for j, a in enumerate(test['acc_ports']): res, out = self.checkVxlan(a, n, test) - if not res: - return False, out + if not res and not wu: + return False, out + " | net_port_rel=%d acc_port_rel=%d" % (i, j) return True, "" - def RegularLAGtoVLAN(self, test): - for n in self.net_ports: - for a in test['acc_ports']: + def RegularLAGtoVLAN(self, test, wu = False): + for i, n in enumerate(self.net_ports): + for j, a in enumerate(test['acc_ports']): res, out = self.checkRegularRegularLAGtoVLAN(a, n, test) - if not res: - return False, out + if not res and not wu: + return False, out + " | net_port_rel=%d acc_port_rel=%d" % (i, j) return True, "" - def RegularVLANtoLAG(self, test): - for dst, ports in self.pc_info: - for a in test['acc_ports']: + def RegularVLANtoLAG(self, test, wu = False): + for i, (dst, ports) in enumerate(self.pc_info): + for j, a in enumerate(test['acc_ports']): res, out = self.checkRegularRegularVLANtoLAG(a, ports, dst, test) - if not res: - return False, out + if not res and not wu: + return False, out + " | pc_info_rel=%d acc_port_rel=%d" % (i, j) return True, "" def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): @@ -242,7 +279,7 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.5) rv = nr_rcvd == self.nr out = "" if not rv: @@ -274,7 +311,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.5) rv = nr_rcvd == self.nr out = "" if not rv: @@ -311,7 +348,7 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.5) rv = nr_rcvd == self.nr out = "" if not rv: diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index 08f7b6ecd2c..fcdf0f1e9cf 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -70,7 +70,7 @@ - name: Configure vxlan decap tunnel shell: sonic-cfggen -j /tmp/vxlan_db.tunnel.json --write-to-db - - name: Configure vxlan decap tunnel map for {{ item }} + - name: Configure vxlan decap tunnel maps shell: sonic-cfggen -j /tmp/vxlan_db.maps.{{ item }}.json --write-to-db with_items: minigraph_vlans @@ -87,9 +87,9 @@ - config_file='/tmp/vxlan_decap.json' - count=10 - - name: Remove vxlan tunnel map configuration for {{ item }} + - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: minigraph_vlans + with_items: "{{ minigraph_vlans }}" - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" @@ -108,9 +108,9 @@ - count=10 - always: - - name: Remove vxlan tunnel map configuration for {{ item }} + - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: minigraph_vlans + with_items: "{{ minigraph_vlans }}" - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" From 5488bf98ec818d104d7ab679915f6844fa965cf9 Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Tue, 20 Aug 2019 17:21:12 -0700 Subject: [PATCH 08/16] [vxlan-decap]: Generate mapping between vlan member ports and vlan ip address robustly (#1078) * Use unquoted style of the variable usage * Make the fact cache valid for a day * Rewrite mapping between vlan ports and vlan ip addresses --- ansible/ansible.cfg | 2 +- .../roles/test/files/ptftests/vxlan-decap.py | 58 +++++++++++++------ ansible/roles/test/tasks/vxlan-decap.yml | 15 +---- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index 990331eb5b9..37ac0f14b5d 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -157,7 +157,7 @@ callback_whitelist = profile_tasks # current IP information. fact_caching = jsonfile fact_caching_connection = ~/.ansible/cache -fact_caching_timeout = 1200 +fact_caching_timeout = 86400 # retry files diff --git a/ansible/roles/test/files/ptftests/vxlan-decap.py b/ansible/roles/test/files/ptftests/vxlan-decap.py index 24058f63be7..4a86e274322 100644 --- a/ansible/roles/test/files/ptftests/vxlan-decap.py +++ b/ansible/roles/test/files/ptftests/vxlan-decap.py @@ -25,6 +25,8 @@ import datetime import subprocess import traceback +import socket +import struct from pprint import pprint from pprint import pformat @@ -57,14 +59,40 @@ def readMacs(self): def generate_ArpResponderConfig(self): config = {} for test in self.tests: - for port in test['acc_ports']: - config['eth%d' % port] = [test['vlan_ip_prefix'] % port] + for port, ip in test['vlan_ip_prefixes'].items(): + config['eth%d' % port] = [ip] with open('/tmp/vxlan_arpresponder.conf', 'w') as fp: json.dump(config, fp) return + def generate_VlanPrefixes(self, gw, prefixlen, acc_ports): + res = {} + n_hosts = 2**(32 - prefixlen) - 3 + nr_of_dataplane_ports = len(self.dataplane.ports) + + if nr_of_dataplane_ports > n_hosts: + raise Exception("The prefix len size is too small for the test") + + gw_addr_n = struct.unpack(">I", socket.inet_aton(gw))[0] + mask = (2**32 - 1) ^ (2**(32 - prefixlen) - 1) + net_addr_n = gw_addr_n & mask + + addr = 1 + for port in acc_ports: + while True: + host_addr_n = net_addr_n + addr + host_ip = socket.inet_ntoa(struct.pack(">I", host_addr_n)) + if host_ip != gw: + break + else: + addr += 1 # skip gw + res[port] = host_ip + addr += 1 + + return res + def setUp(self): self.dataplane = ptf.dataplane_instance @@ -117,20 +145,13 @@ def setUp(self): for d in graph['minigraph_vlan_interfaces']: if d['attachto'] == name: gw = d['addr'] - prefixlen = d['prefixlen'] + prefixlen = int(d['prefixlen']) break else: raise Exception("Vlan '%s' is not found" % name) test['vlan_gw'] = gw - - number_of_dataplane_ports = len(self.dataplane.ports) - if number_of_dataplane_ports > 256: - raise Exception("Too much dataplane ports for the test") - if prefixlen > 24: - raise Exception("The prefix len size is too small for the test") - - test['vlan_ip_prefix'] = '.'.join(gw.split('.')[0:3])+".%d" + test['vlan_ip_prefixes'] = self.generate_VlanPrefixes(gw, prefixlen, test['acc_ports']) self.tests.append(test) @@ -150,11 +171,14 @@ def setUp(self): self.generate_ArpResponderConfig() + self.cmd(["supervisorctl", "restart", "arp_responder"]) + self.dataplane.flush() return def tearDown(self): + self.cmd(["supervisorctl", "stop", "arp_responder"]) return def warmup(self): @@ -258,7 +282,7 @@ def RegularVLANtoLAG(self, test, wu = False): def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): src_mac = self.ptf_mac_addrs['eth%d' % acc_port] dst_mac = self.dut_mac - src_ip = test['vlan_ip_prefix'] % acc_port + src_ip = test['vlan_ip_prefixes'][acc_port] packet = simple_tcp_packet( eth_dst=dst_mac, @@ -279,7 +303,7 @@ def checkRegularRegularVLANtoLAG(self, acc_port, pc_ports, dst_ip, test): for i in xrange(self.nr): testutils.send_packet(self, acc_port, packet) - nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.5) + nr_rcvd = testutils.count_matched_packets_all_ports(self, exp_packet, pc_ports, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: @@ -292,7 +316,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): src_mac = self.random_mac dst_mac = self.dut_mac src_ip = test['src_ip'] - dst_ip = test['vlan_ip_prefix'] % acc_port + dst_ip = test['vlan_ip_prefixes'][acc_port] packet = simple_tcp_packet( eth_dst=dst_mac, @@ -311,7 +335,7 @@ def checkRegularRegularLAGtoVLAN(self, acc_port, net_port, test): for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.5) + nr_rcvd = testutils.count_matched_packets(self, exp_packet, acc_port, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: @@ -323,7 +347,7 @@ def checkVxlan(self, acc_port, net_port, test): inner_dst_mac = self.ptf_mac_addrs['eth%d' % acc_port] inner_src_mac = self.dut_mac inner_src_ip = test['vlan_gw'] - inner_dst_ip = test['vlan_ip_prefix'] % acc_port + inner_dst_ip = test['vlan_ip_prefixes'][acc_port] dst_mac = self.dut_mac src_mac = self.random_mac ip_dst = self.loopback_ip @@ -348,7 +372,7 @@ def checkVxlan(self, acc_port, net_port, test): ) for i in xrange(self.nr): testutils.send_packet(self, net_port, packet) - nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.5) + nr_rcvd = testutils.count_matched_packets(self, inpacket, acc_port, timeout=0.2) rv = nr_rcvd == self.nr out = "" if not rv: diff --git a/ansible/roles/test/tasks/vxlan-decap.yml b/ansible/roles/test/tasks/vxlan-decap.yml index fcdf0f1e9cf..f2ac0d23370 100644 --- a/ansible/roles/test/tasks/vxlan-decap.yml +++ b/ansible/roles/test/tasks/vxlan-decap.yml @@ -31,10 +31,6 @@ vars: supervisor_host: "{{ ptf_host }}" - - name: Start arpresponder - supervisorctl: state=restarted name=arp_responder - delegate_to: "{{ ptf_host }}" - - name: Restart DUT. Wait 240 seconds after SONiC started ssh include: reboot.yml vars: @@ -51,9 +47,6 @@ template: src=vxlan_db.maps.json.j2 dest=/tmp/vxlan_db.maps.{{ item }}.json with_items: minigraph_vlans - - name: Wait for some time until arp cache is ready - pause: seconds=50 - - include: ptf_runner.yml vars: ptf_test_name: Vxlan decap test - No vxlan configuration @@ -89,7 +82,7 @@ - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: "{{ minigraph_vlans }}" + with_items: minigraph_vlans - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" @@ -110,11 +103,7 @@ - always: - name: Remove vxlan tunnel maps configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL_MAP|tunnelVxlan|map{{ item }}" - with_items: "{{ minigraph_vlans }}" + with_items: minigraph_vlans - name: Remove vxlan tunnel configuration shell: docker exec -i database redis-cli -n 4 -c DEL "VXLAN_TUNNEL|tunnelVxlan" - - - name: Stop arpresponder - supervisorctl: state=stopped name=arp_responder - delegate_to: "{{ ptf_host }}" From 1583737ac9fcefbb11ed66a6bbba20915990c87f Mon Sep 17 00:00:00 2001 From: pavel-shirshov Date: Tue, 20 Aug 2019 17:25:20 -0700 Subject: [PATCH 09/16] [vm-set]: Add setting of RCVBUF default parameter (#1076) * Add setting of RCVBUF default parameter * MAke default smaller - 31Mb --- ansible/roles/vm_set/tasks/main.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ansible/roles/vm_set/tasks/main.yml b/ansible/roles/vm_set/tasks/main.yml index 22b502b8881..3a66c6d0729 100644 --- a/ansible/roles/vm_set/tasks/main.yml +++ b/ansible/roles/vm_set/tasks/main.yml @@ -96,13 +96,20 @@ - net.bridge.bridge-nf-call-ip6tables - net.bridge.bridge-nf-call-iptables -- name: Set sysctl RCVBUF parameter for testbed +- name: Set sysctl RCVBUF max parameter for testbed sysctl: name: "net.core.rmem_max" value: 509430500 sysctl_set: yes become: yes +- name: Set sysctl RCVBUF default parameter for testbed + sysctl: + name: "net.core.rmem_default" + value: 31457280 + sysctl_set: yes + become: yes + - name: Setup external front port include: external_port.yml when: external_port is defined From cfd3995bfdf8198f3afefb2a911e95d7e3ef1e8b Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Wed, 21 Aug 2019 10:32:52 -0700 Subject: [PATCH 10/16] [everflow]: Add Mellanox support due to specific GRE packet format (#1077) Mallnox uses a different GRE protocol 0x8949 and crafts extra information in the packet that we need to take care of specifically. Signed-off-by: Shu0T1an ChenG --- .../files/acstests/everflow_policer_test.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ansible/roles/test/files/acstests/everflow_policer_test.py b/ansible/roles/test/files/acstests/everflow_policer_test.py index b09e30a74e5..31249a244db 100644 --- a/ansible/roles/test/files/acstests/everflow_policer_test.py +++ b/ansible/roles/test/files/acstests/everflow_policer_test.py @@ -100,7 +100,16 @@ def checkMirroredFlow(self): """ @summary: Send traffic & check how many mirrored packets are received @return: count: number of mirrored packets received + + Note: + Mellanox crafts the GRE packets with extra information: + That is: 22 bytes extra information after the GRE header """ + payload = self.base_pkt + if self.asic_type in ["mellanox"]: + import binascii + payload = binascii.unhexlify("0"*44) + str(payload) # Add the padding + exp_pkt = testutils.simple_gre_packet( eth_src = self.router_mac, ip_src = self.session_src_ip, @@ -109,15 +118,21 @@ def checkMirroredFlow(self): ip_id = 0, #ip_flags = 0x10, # need to upgrade ptf version to support it ip_ttl = self.session_ttl, - inner_frame = self.base_pkt) + inner_frame = payload) - exp_pkt['GRE'].proto = 0x88be + if self.asic_type in ["mellanox"]: + exp_pkt['GRE'].proto = 0x8949 # Mellanox specific + else: + exp_pkt['GRE'].proto = 0x88be masked_exp_pkt = Mask(exp_pkt) masked_exp_pkt.set_do_not_care_scapy(scapy.Ether, "dst") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "flags") masked_exp_pkt.set_do_not_care_scapy(scapy.IP, "chksum") + if self.asic_type in ["mellanox"]: + masked_exp_pkt.set_do_not_care(304, 176) # Mask the Mellanox specific inner header + self.dataplane.flush() count = 0 From 55f35f9c315d2d85279e9ed0b084e7035b32c325 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 22 Aug 2019 23:06:36 +0800 Subject: [PATCH 11/16] [pytest] Improve infra and update the platform scripts to use the new infra (#1072) * [pytest] Improve infra and update the platform scripts to use the new infra Improvements: * Add testbed_devices fixture * Add common folder for reusable libraries * Use OO design to abstract and encapsulate behavior for various devices in testbed * Improve code reuse by the OO design and common libraries * Add support to run ansible module asynchronously * In devices.AnsibleHostBase, use the improvement of module_ignore_errors by dawnbeauty * Update the platform scripts to use the improved pytest infrastructure * Add a few Mellanox specific testing scripts for completeness and demonstration * Correct the assert statement * Use pre-gathered hwsku facts * Correct mellanox switch profile data * Skip checking fanX_status when fan module is not hot swappable --- tests/common/__init__.py | 0 tests/common/devices.py | 195 ++++++++++++++++++ tests/common/errors.py | 8 + tests/common/mellanox_data.py | 142 +++++++++++++ tests/{platform => common}/utilities.py | 11 + tests/conftest.py | 36 +++- tests/platform/check_critical_services.py | 64 +----- .../mellanox/check_hw_mgmt_service.py | 20 +- tests/platform/mellanox/check_sysfs.py | 48 +++-- .../mellanox/test_check_sfp_using_ethtool.py | 40 ++++ tests/platform/mellanox/test_check_sysfs.py | 27 +++ .../mellanox/test_hw_management_service.py | 15 ++ tests/platform/platform_fixtures.py | 11 + tests/platform/test_platform_info.py | 33 ++- tests/platform/test_reboot.py | 63 +++--- tests/platform/test_reload_config.py | 16 +- tests/platform/test_sequential_restart.py | 34 ++- tests/platform/test_sfp.py | 25 +-- tests/platform/test_xcvr_info_in_db.py | 45 +--- 19 files changed, 599 insertions(+), 234 deletions(-) create mode 100644 tests/common/__init__.py create mode 100644 tests/common/devices.py create mode 100644 tests/common/errors.py create mode 100644 tests/common/mellanox_data.py rename tests/{platform => common}/utilities.py (85%) create mode 100644 tests/platform/mellanox/test_check_sfp_using_ethtool.py create mode 100644 tests/platform/mellanox/test_check_sysfs.py create mode 100644 tests/platform/mellanox/test_hw_management_service.py create mode 100644 tests/platform/platform_fixtures.py diff --git a/tests/common/__init__.py b/tests/common/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/common/devices.py b/tests/common/devices.py new file mode 100644 index 00000000000..9aac450cbb0 --- /dev/null +++ b/tests/common/devices.py @@ -0,0 +1,195 @@ +""" +Classes for various devices that may be used in testing. + +There are other options for interacting with the devices used in testing, for example netmiko, fabric. +We have a big number of customized ansible modules in the sonic-mgmt/ansible/library folder. To reused these +modules, we have no other choice, at least for interacting with SONiC, localhost and PTF. + +We can consider using netmiko for interacting with the VMs used in testing. +""" +import json +import logging +from multiprocessing import Process, Queue + +from errors import RunAnsibleModuleFail +from errors import UnsupportedAnsibleModule + +class AnsibleHostBase(object): + """ + @summary: The base class for various objects. + + This class filters an object from the ansible_adhoc fixture by hostname. The object can be considered as an + ansible host object although it is not under the hood. Anyway, we can use this object to run ansible module + on the host. + """ + + def __init__(self, ansible_adhoc, hostname): + if hostname == 'localhost': + self.host = ansible_adhoc(inventory='localhost', connection='local', host_pattern=hostname)[hostname] + else: + self.host = ansible_adhoc(become=True)[hostname] + self.hostname = hostname + + def __getattr__(self, item): + if self.host.has_module(item): + self.module_name = item + self.module = getattr(self.host, item) + + return self._run + else: + raise UnsupportedAnsibleModule("Unsupported module") + + def _run(self, *module_args, **complex_args): + module_ignore_errors = complex_args.pop('module_ignore_errors', False) + module_async = complex_args.pop('module_async', False) + + if module_async: + q = Queue() + def run_module(queue, module_args, complex_args): + res = self.module(*module_args, **complex_args) + q.put(res[self.hostname]) + p = Process(target=run_module, args=(q, module_args, complex_args)) + p.start() + return p, q + + res = self.module(*module_args, **complex_args)[self.hostname] + if res.is_failed and not module_ignore_errors: + raise RunAnsibleModuleFail("run module {} failed, errmsg {}".format(self.module_name, res)) + + return res + + +class Localhost(AnsibleHostBase): + """ + @summary: Class for localhost + + For running ansible module on localhost + """ + def __init__(self, ansible_adhoc): + AnsibleHostBase.__init__(self, ansible_adhoc, "localhost") + + +class PTFHost(AnsibleHostBase): + """ + @summary: Class for PTF + + Instance of this class can run ansible modules on the PTF host. + """ + def __init__(self, ansible_adhoc, hostname): + AnsibleHostBase.__init__(self, ansible_adhoc, hostname) + + # TODO: Add a method for running PTF script + + +class SonicHost(AnsibleHostBase): + """ + @summary: Class for SONiC switch + + For running ansible module on the SONiC switch + """ + CRITICAL_SERVICES = ["swss", "syncd", "database", "teamd", "bgp", "pmon", "lldp"] + + def __init__(self, ansible_adhoc, hostname, gather_facts=False): + AnsibleHostBase.__init__(self, ansible_adhoc, hostname) + if gather_facts: + self.gather_facts() + + def _platform_info(self): + platform_info = self.command("show platform summary")["stdout_lines"] + for line in platform_info: + if line.startswith("Platform:"): + self.facts["platform"] = line.split(":")[1].strip() + elif line.startswith("HwSKU:"): + self.facts["hwsku"] = line.split(":")[1].strip() + elif line.startswith("ASIC:"): + self.facts["asic_type"] = line.split(":")[1].strip() + + def gather_facts(self): + """ + @summary: Gather facts of the SONiC switch and store the gathered facts in the dict type 'facts' attribute. + """ + self.facts = {} + self._platform_info() + logging.debug("SonicHost facts: %s" % json.dumps(self.facts)) + + def get_service_props(self, service, props=["ActiveState", "SubState"]): + """ + @summary: Use 'systemctl show' command to get detailed properties of a service. By default, only get + ActiveState and SubState of the service. + @param service: Service name. + @param props: Properties of the service to be shown. + @return: Returns a dictionary containing properties of the specified service, for example: + { + "ActivateState": "active", + "SubState": "running" + } + """ + props = " ".join(["-p %s" % prop for prop in props]) + output = self.command("systemctl %s show %s" % (props, service)) + result = {} + for line in output["stdout_lines"]: + fields = line.split("=") + if len(fields) >= 2: + result[fields[0]] = fields[1] + return result + + def is_service_fully_started(self, service): + """ + @summary: Check whether a SONiC specific service is fully started. + + The last step in the starting script of all SONiC services is to run "docker wait ". This command + will not exit unless the docker container of the service is stopped. We use this trick to determine whether + a SONiC service has completed starting. + + @param service: Name of the SONiC service + """ + try: + output = self.command('pgrep -f "docker wait %s"' % service) + if output["stdout_lines"]: + return True + else: + return False + except: + return False + + def critical_services_fully_started(self): + """ + @summary: Check whether all the SONiC critical services have started + """ + result = {} + for service in self.CRITICAL_SERVICES: + result[service] = self.is_service_fully_started(service) + + logging.debug("Status of critical services: %s" % str(result)) + return all(result.values()) + + + def get_crm_resources(self): + """ + @summary: Run the "crm show resources all" command and parse its output + """ + result = {"main_resources": {}, "acl_resources": [], "table_resources": []} + output = self.command("crm show resources all")["stdout_lines"] + current_table = 0 # Totally 3 tables in the command output + for line in output: + if len(line.strip()) == 0: + continue + if "---" in line: + current_table += 1 + continue + if current_table == 1: # content of first table, main resources + fields = line.split() + if len(fields) == 3: + result["main_resources"][fields[0]] = {"used": int(fields[1]), "available": int(fields[2])} + if current_table == 2: # content of the second table, acl resources + fields = line.split() + if len(fields) == 5: + result["acl_resources"].append({"stage": fields[0], "bind_point": fields[1], + "resource_name": fields[2], "used_count": int(fields[3]), "available_count": int(fields[4])}) + if current_table == 3: # content of the third table, table resources + fields = line.split() + if len(fields) == 4: + result["table_resources"].append({"table_id": fields[0], "resource_name": fields[1], + "used_count": int(fields[2]), "available_count": int(fields[3])}) + + return result diff --git a/tests/common/errors.py b/tests/common/errors.py new file mode 100644 index 00000000000..25a2397a6df --- /dev/null +++ b/tests/common/errors.py @@ -0,0 +1,8 @@ +""" +Customize exceptions +""" +class UnsupportedAnsibleModule(Exception): + pass + +class RunAnsibleModuleFail(Exception): + pass diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py new file mode 100644 index 00000000000..7ef9aa424bd --- /dev/null +++ b/tests/common/mellanox_data.py @@ -0,0 +1,142 @@ + +SPC1_HWSKUS = ["ACS-MSN2700", "Mellanox-SN2700", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010"] +SPC2_HWSKUS = ["ACS-MSN3700", "ACS-MSN3700C", "ACS-MSN3800"] +SWITCH_HWSKUS = SPC1_HWSKUS + SPC2_HWSKUS + +SWITCH_MODELS = { + "ACS-MSN2700": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": True + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN2740": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN2410": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": True + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN2010": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": False + }, + "psus": { + "number": 2, + "hot_swappable": False + } + }, + "ACS-MSN2100": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": False + }, + "psus": { + "number": 2, + "hot_swappable": False + } + }, + "ACS-MSN3800": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 3, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN3700": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 6, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN3700C": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 4, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + }, + "ACS-MSN3510": { + "reboot": { + "cold_reboot": True, + "fast_reboot": True, + "warm_reboot": False + }, + "fans": { + "number": 6, + "hot_swappable": True + }, + "psus": { + "number": 2, + "hot_swappable": True + } + } +} diff --git a/tests/platform/utilities.py b/tests/common/utilities.py similarity index 85% rename from tests/platform/utilities.py rename to tests/common/utilities.py index 6ec3a349d7c..85a32204352 100644 --- a/tests/platform/utilities.py +++ b/tests/common/utilities.py @@ -4,6 +4,17 @@ import time import logging + +def wait(seconds, msg=""): + """ + @summary: Pause specified number of seconds + @param seconds: Number of seconds to pause + @param msg: Optional extra message for pause reason + """ + logging.debug("Pause %d seconds, reason: %s" % (seconds, msg)) + time.sleep(seconds) + + def wait_until(timeout, interval, condition, *args, **kwargs): """ @summary: Wait until the specified condition is True or timeout. diff --git a/tests/conftest.py b/tests/conftest.py index e5380c840a6..2701fdf06fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,6 @@ +import sys +import os + import pytest import csv import yaml @@ -5,9 +8,13 @@ from ansible_host import AnsibleHost - pytest_plugins = ('ptf_fixtures', 'ansible_fixtures') +# Add the tests folder to sys.path, for importing the lib package +_current_file_dir = os.path.dirname(os.path.realpath(__file__)) +if _current_file_dir not in sys.path: + sys.path.append(current_file_dir) + class TestbedInfo(object): """ @@ -61,6 +68,33 @@ def testbed(request): return tbinfo.testbed_topo[tbname] +@pytest.fixture(scope="module") +def testbed_devices(ansible_adhoc, testbed): + """ + @summary: Fixture for creating dut, localhost and other necessary objects for testing. These objects provide + interfaces for interacting with the devices used in testing. + @param ansible_adhoc: Fixture provided by the pytest-ansible package. Source of the various device objects. It is + mandatory argument for the class constructors. + @param testbed: Fixture for parsing testbed configuration file. + @return: Return the created device objects in a dictionary + """ + from common.devices import SonicHost, Localhost + + devices = {} + devices["localhost"] = Localhost(ansible_adhoc) + devices["dut"] = SonicHost(ansible_adhoc, testbed["dut"], gather_facts=True) + if "ptf" in testbed: + devices["ptf"] = PTFHost(ansible_adhoc, testbed["ptf"]) + + # In the future, we can implement more classes for interacting with other testbed devices in the lib.devices + # module. Then, in this fixture, we can initialize more instance of the classes and store the objects in the + # devices dict here. For example, we could have + # from common.devices import FanoutHost + # devices["fanout"] = FanoutHost(ansible_adhoc, testbed["dut"]) + + return devices + + @pytest.fixture(scope="module") def duthost(ansible_adhoc, testbed): """ diff --git a/tests/platform/check_critical_services.py b/tests/platform/check_critical_services.py index 30d0e6393ca..28162d0ca85 100644 --- a/tests/platform/check_critical_services.py +++ b/tests/platform/check_critical_services.py @@ -6,63 +6,7 @@ import time import logging -from utilities import wait_until - -critical_services = ["swss", "syncd", "database", "teamd", "bgp", "pmon", "lldp"] - - -def get_service_status(dut, service): - """ - @summary: Get the ActiveState and SubState of a service. This function uses the systemctl tool to get the - ActiveState and SubState of specified service. - @param dut: The AnsibleHost object of DUT. For interacting with DUT. - @param service: Service name. - @return: Returns a dictionary containing ActiveState and SubState of the specified service, for example: - { - "ActivateState": "active", - "SubState": "running" - } - """ - output = dut.command("systemctl -p ActiveState -p SubState show %s" % service) - result = {} - for line in output["stdout_lines"]: - fields = line.split("=") - if len(fields) >= 2: - result[fields[0]] = fields[1] - return result - - -def service_fully_started(dut, service): - """ - @summary: Check whether the specified service is fully started on DUT. According to the SONiC design, the last - instruction in service starting script is to run "docker wait ". This function take advantage - of this design to check whether a service has been fully started. The trick is to check whether - "docker wait " exists in current running processes. - @param dut: The AnsibleHost object of DUT. For interacting with DUT. - @param service: Service name. - @return: Return True if the specified service is fully started. Otherwise return False. - """ - try: - output = dut.command('pgrep -f "docker wait %s"' % service) - if output["stdout_lines"]: - return True - else: - return False - except: - return False - - -def critical_services_fully_started(dut): - """ - @summary: Check whether all the critical service have been fully started. - @param dut: The AnsibleHost object of DUT. For interacting with DUT. - @return: Return True if all the critical services have been fully started. Otherwise return False. - """ - result = {} - for service in critical_services: - result[service] = service_fully_started(dut, service) - logging.debug("Status of critical services: %s" % str(result)) - return all(result.values()) +from common.utilities import wait_until def check_critical_services(dut): @@ -72,11 +16,11 @@ def check_critical_services(dut): @param dut: The AnsibleHost object of DUT. For interacting with DUT. """ logging.info("Wait until all critical services are fully started") - assert wait_until(300, 20, critical_services_fully_started, dut), "Not all critical services are fully started" + assert wait_until(300, 20, dut.critical_services_fully_started), "Not all critical services are fully started" logging.info("Check critical service status") - for service in critical_services: - status = get_service_status(dut, service) + for service in dut.CRITICAL_SERVICES: + status = dut.get_service_props(service) assert status["ActiveState"] == "active", \ "ActiveState of %s is %s, expected: active" % (service, status["ActiveState"]) assert status["SubState"] == "running", \ diff --git a/tests/platform/mellanox/check_hw_mgmt_service.py b/tests/platform/mellanox/check_hw_mgmt_service.py index 1b012c74027..c7fdb199929 100644 --- a/tests/platform/mellanox/check_hw_mgmt_service.py +++ b/tests/platform/mellanox/check_hw_mgmt_service.py @@ -4,7 +4,7 @@ import logging import re -from utilities import wait_until +from common.utilities import wait_until def fan_speed_set_to_default(dut): @@ -12,17 +12,21 @@ def fan_speed_set_to_default(dut): return fan_speed_setting == "153" -def wait_until_fan_speed_set_to_default(dut): - wait_until(300, 10, fan_speed_set_to_default, dut) +def wait_until_fan_speed_set_to_default(dut, timeout=300, interval=10): + wait_until(timeout, interval, fan_speed_set_to_default, dut) def check_hw_management_service(dut): """This function is to check the hw management service and related settings. """ + logging.info("Check fan speed setting") + assert not wait_until_fan_speed_set_to_default(dut), \ + "Fan speed is not default to 60 percent in 5 minutes. 153/255=60%" + logging.info("Check service status using systemctl") - hw_mgmt_service_state = dut.command("systemctl -p ActiveState -p SubState show hw-management") - assert hw_mgmt_service_state["stdout"].find("ActiveState=active") >= 0, "The hw-management service is not active" - assert hw_mgmt_service_state["stdout"].find("SubState=exited") >= 0, "The hw-management service is not exited" + hw_mgmt_service_state = dut.get_service_props("hw-management") + assert hw_mgmt_service_state["ActiveState"] == "active", "The hw-management service is not active" + assert hw_mgmt_service_state["SubState"] == "exited", "The hw-management service is not exited" logging.info("Check the thermal control process") tc_pid = dut.command("pgrep -f /usr/bin/hw-management-thermal-control.sh") @@ -32,10 +36,6 @@ def check_hw_management_service(dut): tc_suspend = dut.command("cat /var/run/hw-management/config/suspend") assert tc_suspend["stdout"] == "1", "Thermal control is not suspended" - logging.info("Check fan speed setting") - fan_speed_setting = dut.command("cat /var/run/hw-management/thermal/pwm1") - assert fan_speed_setting["stdout"] == "153", "Fan speed is not default to 60%. 153/255=60%" - logging.info("Check dmesg") dmesg = dut.command("sudo dmesg") error_keywords = ["crash", "Out of memory", "Call Trace", "Exception", "panic"] diff --git a/tests/platform/mellanox/check_sysfs.py b/tests/platform/mellanox/check_sysfs.py index 199446832a5..a273f881ace 100644 --- a/tests/platform/mellanox/check_sysfs.py +++ b/tests/platform/mellanox/check_sysfs.py @@ -5,6 +5,7 @@ """ import logging +from check_hw_mgmt_service import wait_until_fan_speed_set_to_default def check_sysfs(dut): """ @@ -17,57 +18,62 @@ def check_sysfs(dut): logging.info("Check content of some key files") + assert not wait_until_fan_speed_set_to_default(dut, timeout=120), \ + "Content of /var/run/hw-management/thermal/pwm1 should be 153" + file_suspend = dut.command("cat /var/run/hw-management/config/suspend") assert file_suspend["stdout"] == "1", "Content of /var/run/hw-management/config/suspend should be 1" - file_pwm1 = dut.command("cat /var/run/hw-management/thermal/pwm1") - assert file_pwm1["stdout"] == "153", "Content of /var/run/hw-management/thermal/pwm1 should be 153" - file_asic = dut.command("cat /var/run/hw-management/thermal/asic") try: asic_temp = float(file_asic["stdout"]) / 1000 assert asic_temp > 0 and asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"] except: - assert "Bad content in /var/run/hw-management/thermal/asic: %s" % file_asic["stdout"] + assert False, "Bad content in /var/run/hw-management/thermal/asic: %s" % file_asic["stdout"] + + dut_hwsku = dut.facts["hwsku"] + from common.mellanox_data import SWITCH_MODELS + fan_count = SWITCH_MODELS[dut_hwsku]["fans"]["number"] - fan_status_list = dut.command("find /var/run/hw-management/thermal -name fan*_status") - for fan_status in fan_status_list["stdout_lines"]: - fan_status_content = dut.command("cat %s" % fan_status) - assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status + if SWITCH_MODELS[dut_hwsku]["fans"]["hot_swappable"]: + fan_status_list = ["/var/run/hw-management/thermal/fan%d_status" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_status in fan_status_list: + fan_status_content = dut.command("cat %s" % fan_status) + assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status - fan_fault_list = dut.command("find /var/run/hw-management/thermal -name fan*_fault") - for fan_fault in fan_fault_list["stdout_lines"]: + fan_fault_list = ["/var/run/hw-management/thermal/fan%d_fault" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_fault in fan_fault_list: fan_fault_content = dut.command("cat %s" % fan_fault) assert fan_fault_content["stdout"] == "0", "Content of %s is not 0" % fan_fault - fan_min_list = dut.command("find /var/run/hw-management/thermal -name fan*_min") - for fan_min in fan_min_list["stdout_lines"]: + fan_min_list = ["/var/run/hw-management/thermal/fan%d_min" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_min in fan_min_list: try: fan_min_content = dut.command("cat %s" % fan_min) fan_min_speed = int(fan_min_content["stdout"]) assert fan_min_speed > 0, "Bad fan minimum speed: %s" % str(fan_min_speed) except Exception as e: - assert "Get content from %s failed, exception: %s" % (fan_min, repr(e)) + assert False, "Get content from %s failed, exception: %s" % (fan_min, repr(e)) - fan_max_list = dut.command("find /var/run/hw-management/thermal -name fan*_max") - for fan_max in fan_max_list["stdout_lines"]: + fan_max_list = ["/var/run/hw-management/thermal/fan%d_max" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_max in fan_max_list: try: fan_max_content = dut.command("cat %s" % fan_max) fan_max_speed = int(fan_max_content["stdout"]) assert fan_max_speed > 10000, "Bad fan maximum speed: %s" % str(fan_max_speed) except Exception as e: - assert "Get content from %s failed, exception: %s" % (fan_max, repr(e)) + assert False, "Get content from %s failed, exception: %s" % (fan_max, repr(e)) - fan_speed_get_list = dut.command("find /var/run/hw-management/thermal -name fan*_speed_get") - for fan_speed_get in fan_speed_get_list["stdout_lines"]: + fan_speed_get_list = ["/var/run/hw-management/thermal/fan%d_speed_get" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_speed_get in fan_speed_get_list: try: fan_speed_get_content = dut.command("cat %s" % fan_speed_get) fan_speed = int(fan_speed_get_content["stdout"]) assert fan_speed > 1000, "Bad fan speed: %s" % str(fan_speed) except Exception as e: - assert "Get content from %s failed, exception: %s" % (fan_speed_get, repr(e)) + assert False, "Get content from %s failed, exception: %s" % (fan_speed_get, repr(e)) - fan_speed_set_list = dut.command("find /var/run/hw-management/thermal -name fan*_speed_set") - for fan_speed_set in fan_speed_set_list["stdout_lines"]: + fan_speed_set_list = ["/var/run/hw-management/thermal/fan%d_speed_set" % fan_id for fan_id in range(1, fan_count + 1)] + for fan_speed_set in fan_speed_set_list: fan_speed_set_content = dut.command("cat %s" % fan_speed_set) assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255" diff --git a/tests/platform/mellanox/test_check_sfp_using_ethtool.py b/tests/platform/mellanox/test_check_sfp_using_ethtool.py new file mode 100644 index 00000000000..5f1e1dc2684 --- /dev/null +++ b/tests/platform/mellanox/test_check_sfp_using_ethtool.py @@ -0,0 +1,40 @@ +""" +Check SFP using ethtool + +This script covers the test case 'Check SFP using ethtool' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging +import os +import json + +from platform_fixtures import conn_graph_facts +from check_hw_mgmt_service import check_hw_management_service + + +def test_check_sfp_using_ethtool(testbed_devices, conn_graph_facts): + """This test case is to check SFP using the ethtool. + """ + ans_host = testbed_devices["dut"] + ports_config = json.loads(ans_host.command("sudo sonic-cfggen -d --var-json PORT")["stdout"]) + + logging.info("Use the ethtool to check SFP information") + for intf in conn_graph_facts["device_conn"]: + intf_lanes = ports_config[intf]["lanes"] + sfp_id = int(intf_lanes.split(",")[0])/4 + 1 + + ethtool_sfp_output = ans_host.command("sudo ethtool -m sfp%s" % str(sfp_id)) + assert ethtool_sfp_output["rc"] == 0, "Failed to read eeprom of sfp%s using ethtool" % str(sfp_id) + assert len(ethtool_sfp_output["stdout_lines"]) >= 5, \ + "Does the ethtool output look normal? " + str(ethtool_sfp_output["stdout_lines"]) + for line in ethtool_sfp_output["stdout_lines"]: + assert len(line.split(":")) >= 2, \ + "Unexpected line %s in %s" % (line, str(ethtool_sfp_output["stdout_lines"])) + + logging.info("Check interface status") + mg_facts = ans_host.minigraph_facts(host=ans_host.hostname)["ansible_facts"] + intf_facts = ans_host.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"] + assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ + "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"]) + + check_hw_management_service(ans_host) diff --git a/tests/platform/mellanox/test_check_sysfs.py b/tests/platform/mellanox/test_check_sysfs.py new file mode 100644 index 00000000000..a6973a492e6 --- /dev/null +++ b/tests/platform/mellanox/test_check_sysfs.py @@ -0,0 +1,27 @@ +""" +Check SYSFS + +This script covers the test case 'Check SYSFS' in the SONiC platform test plan: +https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" +import logging + +from check_sysfs import check_sysfs + + +def test_check_hw_mgmt_sysfs(testbed_devices): + """This test case is to check the symbolic links under /var/run/hw-management + """ + ans_host = testbed_devices["dut"] + check_sysfs(ans_host) + + +def test_hw_mgmt_sysfs_mapped_to_pmon(testbed_devices): + """This test case is to verify that the /var/run/hw-management folder is mapped to pmon container + """ + ans_host = testbed_devices["dut"] + + logging.info("Verify that the /var/run/hw-management folder is mapped to the pmon container") + files_under_dut = set(ans_host.command("find /var/run/hw-management")["stdout_lines"]) + files_under_pmon = set(ans_host.command("docker exec pmon find /var/run/hw-management")["stdout_lines"]) + assert files_under_dut == files_under_pmon, "Folder /var/run/hw-management is not mapped to pmon" diff --git a/tests/platform/mellanox/test_hw_management_service.py b/tests/platform/mellanox/test_hw_management_service.py new file mode 100644 index 00000000000..f9f91ca4fb5 --- /dev/null +++ b/tests/platform/mellanox/test_hw_management_service.py @@ -0,0 +1,15 @@ +""" +Verify that the hw-management service is running properly + +This script covers test case 'Ensure that the hw-management service is running properly' in the SONiC platform test +plan: https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md +""" + +from check_hw_mgmt_service import check_hw_management_service + + +def test_hw_management_service_status(testbed_devices): + """This test case is to verify that the hw-management service is running properly + """ + ans_host = testbed_devices["dut"] + check_hw_management_service(ans_host) diff --git a/tests/platform/platform_fixtures.py b/tests/platform/platform_fixtures.py new file mode 100644 index 00000000000..0b73940db3f --- /dev/null +++ b/tests/platform/platform_fixtures.py @@ -0,0 +1,11 @@ +import pytest + +@pytest.fixture(scope="module") +def conn_graph_facts(testbed_devices): + dut = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + base_path = os.path.dirname(os.path.realpath(__file__)) + lab_conn_graph_file = os.path.join(base_path, "../../ansible/files/lab_connection_graph.xml") + conn_graph_facts = localhost.conn_graph_facts(host=dut.hostname, filename=lab_conn_graph_file)['ansible_facts'] + return conn_graph_facts diff --git a/tests/platform/test_platform_info.py b/tests/platform/test_platform_info.py index 73e0edb4ecc..67eb65676a5 100644 --- a/tests/platform/test_platform_info.py +++ b/tests/platform/test_platform_info.py @@ -10,7 +10,6 @@ import pytest -from ansible_host import AnsibleHost from psu_controller import psu_controller @@ -19,12 +18,11 @@ CMD_PLATFORM_SYSEEPROM = "show platform syseeprom" -def test_show_platform_summary(localhost, ansible_adhoc, testbed): +def test_show_platform_summary(testbed_devices): """ @summary: Check output of 'show platform summary' """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) + ans_host = testbed_devices["dut"] logging.info("Check output of '%s'" % CMD_PLATFORM_SUMMARY) platform_summary = ans_host.command(CMD_PLATFORM_SUMMARY) @@ -39,27 +37,24 @@ def test_show_platform_summary(localhost, ansible_adhoc, testbed): "Unexpected output fields, actual=%s, expected=%s" % (str(actual_fields), str(expected_fields)) -def test_show_platform_psustatus(localhost, ansible_adhoc, testbed): +def test_show_platform_psustatus(testbed_devices): """ @summary: Check output of 'show platform psustatus' """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) + ans_host = testbed_devices["dut"] - logging.info("Check PSU status using '%s', hostname: %s" % (CMD_PLATFORM_PSUSTATUS, hostname)) + logging.info("Check PSU status using '%s', hostname: %s" % (CMD_PLATFORM_PSUSTATUS, ans_host.hostname)) psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK)") for line in psu_status["stdout_lines"][2:]: assert psu_line_pattern.match(line), "Unexpected PSU status output" -def test_turn_on_off_psu_and_check_psustatus(localhost, ansible_adhoc, testbed, psu_controller): +def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): """ @summary: Turn off/on PSU and check PSU status using 'show platform psustatus' """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - platform_info = parse_platform_summary(ans_host.command(CMD_PLATFORM_SUMMARY)["stdout_lines"]) + ans_host = testbed_devices["dut"] psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") cmd_num_psu = "sudo psuutil numpsus" @@ -75,9 +70,9 @@ def test_turn_on_off_psu_and_check_psustatus(localhost, ansible_adhoc, testbed, pytest.skip("At least 2 PSUs required for rest of the testing in this case") logging.info("Create PSU controller for testing") - psu_ctrl = psu_controller(hostname, platform_info["asic"]) + psu_ctrl = psu_controller(ans_host.hostname, ans_host.facts["asic_type"]) if psu_ctrl is None: - pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % hostname) + pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % ans_host.hostname) logging.info("To avoid DUT losing power, need to turn on PSUs that are not powered") all_psu_status = psu_ctrl.get_psu_status() @@ -146,18 +141,16 @@ def parse_platform_summary(raw_input_lines): return res -def test_show_platform_syseeprom(localhost, ansible_adhoc, testbed): +def test_show_platform_syseeprom(testbed_devices): """ @summary: Check output of 'show platform syseeprom' """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) + ans_host = testbed_devices["dut"] logging.info("Check output of '%s'" % CMD_PLATFORM_SYSEEPROM) - platform_info = parse_platform_summary(ans_host.command(CMD_PLATFORM_SUMMARY)["stdout_lines"]) show_output = ans_host.command(CMD_PLATFORM_SYSEEPROM) assert show_output["rc"] == 0, "Run command '%s' failed" % CMD_PLATFORM_SYSEEPROM - if platform_info["asic"] in ["mellanox"]: + if ans_host.facts["asic_type"] in ["mellanox"]: expected_fields = [ "Product Name", "Part Number", @@ -172,7 +165,7 @@ def test_show_platform_syseeprom(localhost, ansible_adhoc, testbed): "CRC-32"] utility_cmd = "sudo python -c \"import imp; \ m = imp.load_source('eeprom', '/usr/share/sonic/device/%s/plugins/eeprom.py'); \ - t = m.board('board', '', '', ''); e = t.read_eeprom(); t.decode_eeprom(e)\"" % platform_info["platform"] + t = m.board('board', '', '', ''); e = t.read_eeprom(); t.decode_eeprom(e)\"" % ans_host.facts["platform"] utility_cmd_output = ans_host.command(utility_cmd) for field in expected_fields: diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index a402b408acf..f266876b11e 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -15,42 +15,41 @@ import pytest -from ansible_host import AnsibleHost -from utilities import wait_until +from platform_fixtures import conn_graph_facts +from common.utilities import wait_until from check_critical_services import check_critical_services from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic from check_transceiver_status import all_transceivers_detected -def reboot_and_check(localhost, dut, reboot_type="cold"): +def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"): """ Perform the specified type of reboot and check platform status. """ - dut.command("show platform summary") - lab_conn_graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ - "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=dut.hostname, filename=lab_conn_graph_file).\ - contacted['localhost']['ansible_facts'] - interfaces = conn_graph_facts["device_conn"] - asic_type = dut.shell("show platform summary | awk '/ASIC: / {print$2}'")["stdout"].strip() - logging.info("Run %s reboot on DUT" % reboot_type) if reboot_type == "cold": - reboot_cmd = "sudo reboot &" + reboot_cmd = "reboot" reboot_timeout = 300 elif reboot_type == "fast": - reboot_cmd = "sudo fast-reboot &" + reboot_cmd = "fast-reboot" reboot_timeout = 180 elif reboot_type == "warm": - reboot_cmd = "sudo warm-reboot &" + reboot_cmd = "warm-reboot" reboot_timeout = 180 else: assert False, "Reboot type %s is not supported" % reboot_type - dut.shell(reboot_cmd) + process, queue = dut.command(reboot_cmd, module_async=True) logging.info("Wait for DUT to go down") - localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120) + res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120, + module_ignore_errors=True) + if "failed" in res: + if process.is_alive(): + logging.error("Command '%s' is not completed" % reboot_cmd) + process.terminate() + logging.error("reboot result %s" % str(queue.get())) + assert False, "DUT did not go down" logging.info("Wait for DUT to come back") localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout) @@ -68,19 +67,15 @@ def reboot_and_check(localhost, dut, reboot_type="cold"): logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) - if asic_type in ["mellanox"]: + if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") if sub_folder_dir not in sys.path: sys.path.append(sub_folder_dir) from check_hw_mgmt_service import check_hw_management_service - from check_hw_mgmt_service import wait_until_fan_speed_set_to_default from check_sysfs import check_sysfs - logging.info("Wait until fan speed is set to default") - wait_until_fan_speed_set_to_default(dut) - logging.info("Check the hw-management service") check_hw_management_service(dut) @@ -88,37 +83,37 @@ def reboot_and_check(localhost, dut, reboot_type="cold"): check_sysfs(dut) -def test_cold_reboot(localhost, ansible_adhoc, testbed): +def test_cold_reboot(testbed_devices, conn_graph_facts): """ @summary: This test case is to perform cold reboot and check platform status """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] - reboot_and_check(localhost, ans_host, reboot_type="cold") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="cold") -def test_fast_reboot(localhost, ansible_adhoc, testbed): +def test_fast_reboot(testbed_devices, conn_graph_facts): """ @summary: This test case is to perform cold reboot and check platform status """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] - reboot_and_check(localhost, ans_host, reboot_type="fast") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="fast") -def test_warm_reboot(localhost, ansible_adhoc, testbed): +def test_warm_reboot(testbed_devices, conn_graph_facts): """ @summary: This test case is to perform cold reboot and check platform status """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - asic_type = ans_host.shell("show platform summary | awk '/ASIC: / {print$2}'")["stdout"].strip() + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + asic_type = ans_host.facts["asic_type"] if asic_type in ["mellanox"]: issu_capability = ans_host.command("show platform mlnx issu")["stdout"] if "disabled" in issu_capability: pytest.skip("ISSU is not supported on this DUT, skip this test case") - reboot_and_check(localhost, ans_host, reboot_type="warm") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="warm") diff --git a/tests/platform/test_reload_config.py b/tests/platform/test_reload_config.py index 9b3bb583a80..3937317b5ec 100644 --- a/tests/platform/test_reload_config.py +++ b/tests/platform/test_reload_config.py @@ -10,27 +10,21 @@ import time import sys -from ansible_host import AnsibleHost -from utilities import wait_until +from platform_fixtures import conn_graph_facts +from common.utilities import wait_until from check_critical_services import check_critical_services from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic from check_transceiver_status import all_transceivers_detected -def test_reload_configuration(localhost, ansible_adhoc, testbed): +def test_reload_configuration(testbed_devices, conn_graph_facts): """ @summary: This test case is to reload the configuration and check platform status """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - ans_host.command("show platform summary") - lab_conn_graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ - "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=hostname, filename=lab_conn_graph_file).\ - contacted['localhost']['ansible_facts'] + ans_host = testbed_devices["dut"] interfaces = conn_graph_facts["device_conn"] - asic_type = ans_host.shell("show platform summary | awk '/ASIC: / {print$2}'")["stdout"].strip() + asic_type = ans_host.facts["asic_type"] logging.info("Reload configuration") ans_host.command("sudo config reload -y") diff --git a/tests/platform/test_sequential_restart.py b/tests/platform/test_sequential_restart.py index 8e715708a25..29ed98e702b 100644 --- a/tests/platform/test_sequential_restart.py +++ b/tests/platform/test_sequential_restart.py @@ -10,25 +10,20 @@ import time import sys -from ansible_host import AnsibleHost -from utilities import wait_until +import pytest + +from platform_fixtures import conn_graph_facts +from common.utilities import wait_until from check_critical_services import check_critical_services from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic from check_transceiver_status import all_transceivers_detected -def restart_service_and_check(localhost, dut, service): +def restart_service_and_check(localhost, dut, service, interfaces): """ Restart specified service and check platform status """ - dut.command("show platform summary") - lab_conn_graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ - "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=dut.hostname, filename=lab_conn_graph_file).\ - contacted['localhost']['ansible_facts'] - interfaces = conn_graph_facts["device_conn"] - asic_type = dut.shell("show platform summary | awk '/ASIC: / {print$2}'")["stdout"].strip() logging.info("Restart the %s service" % service) dut.command("sudo systemctl restart %s" % service) @@ -47,7 +42,7 @@ def restart_service_and_check(localhost, dut, service): logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) - if asic_type in ["mellanox"]: + if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) sub_folder_dir = os.path.join(current_file_dir, "mellanox") @@ -63,19 +58,20 @@ def restart_service_and_check(localhost, dut, service): check_sysfs(dut) -def test_restart_swss(localhost, ansible_adhoc, testbed): +def test_restart_swss(testbed_devices, conn_graph_facts): """ @summary: This test case is to restart the swss service and check platform status """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - restart_service_and_check(localhost, ans_host, "swss") + dut = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + restart_service_and_check(localhost, dut, "swss", conn_graph_facts["device_conn"]) -def test_restart_syncd(localhost, ansible_adhoc, testbed): +@pytest.mark.skip(reason="Restarting syncd is not supported yet") +def test_restart_syncd(testbed_devices, conn_graph_facts): """ @summary: This test case is to restart the syncd service and check platform status """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - restart_service_and_check(localhost, ans_host, "syncd") + dut = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + restart_service_and_check(localhost, dut, "syncd", conn_graph_facts["device_conn"]) diff --git a/tests/platform/test_sfp.py b/tests/platform/test_sfp.py index ea2c871c469..bba52ad5473 100644 --- a/tests/platform/test_sfp.py +++ b/tests/platform/test_sfp.py @@ -10,7 +10,9 @@ import time import copy -from ansible_host import AnsibleHost +import pytest + +from platform_fixtures import conn_graph_facts def parse_output(output_lines): @@ -42,7 +44,7 @@ def parse_eeprom(output_lines): return res -def test_check_sfp_status_and_configure_sfp(localhost, ansible_adhoc, testbed): +def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): """ @summary: Check SFP status and configure SFP @@ -54,13 +56,8 @@ def test_check_sfp_status_and_configure_sfp(localhost, ansible_adhoc, testbed): * show interface transceiver eeprom * sfputil reset """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - localhost.command("who") - lab_conn_graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ - "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=hostname, filename=lab_conn_graph_file).\ - contacted['localhost']['ansible_facts'] + + ans_host = testbed_devices["dut"] cmd_sfp_presence = "sudo sfputil show presence" cmd_sfp_eeprom = "sudo sfputil show eeprom" @@ -110,7 +107,7 @@ def test_check_sfp_status_and_configure_sfp(localhost, ansible_adhoc, testbed): assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" -def test_check_sfp_low_power_mode(localhost, ansible_adhoc, testbed): +def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): """ @summary: Check SFP low power mode @@ -119,13 +116,7 @@ def test_check_sfp_low_power_mode(localhost, ansible_adhoc, testbed): * sfputil lpmode off * sfputil lpmode on """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - localhost.command("who") - lab_conn_graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ - "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=hostname, filename=lab_conn_graph_file).\ - contacted['localhost']['ansible_facts'] + ans_host = testbed_devices["dut"] cmd_sfp_presence = "sudo sfputil show presence" cmd_sfp_show_lpmode = "sudo sfputil show lpmode" diff --git a/tests/platform/test_xcvr_info_in_db.py b/tests/platform/test_xcvr_info_in_db.py index 0abedb83844..264a0e78859 100644 --- a/tests/platform/test_xcvr_info_in_db.py +++ b/tests/platform/test_xcvr_info_in_db.py @@ -8,52 +8,15 @@ import re import os -from ansible_host import AnsibleHost from check_transceiver_status import check_transceiver_status +from platform_fixtures import conn_graph_facts -def parse_transceiver_info(output_lines): - """ - @summary: Parse the list of transceiver from DB table TRANSCEIVER_INFO content - @param output_lines: DB table TRANSCEIVER_INFO content output by 'redis' command - @return: Return parsed transceivers in a list - """ - res = [] - p = re.compile(r"TRANSCEIVER_INFO\|(Ethernet\d+)") - for line in output_lines: - m = p.match(line) - assert m, "Unexpected line %s" % line - res.append(m.group(1)) - return res - - -def parse_transceiver_dom_sensor(output_lines): - """ - @summary: Parse the list of transceiver from DB table TRANSCEIVER_DOM_SENSOR content - @param output_lines: DB table TRANSCEIVER_DOM_SENSOR content output by 'redis' command - @return: Return parsed transceivers in a list - """ - res = [] - p = re.compile(r"TRANSCEIVER_DOM_SENSOR\|(Ethernet\d+)") - for line in output_lines: - m = p.match(line) - assert m, "Unexpected line %s" % line - res.append(m.group(1)) - return res - - -def test_xcvr_info_in_db(localhost, ansible_adhoc, testbed): +def test_xcvr_info_in_db(testbed_devices, conn_graph_facts): """ @summary: This test case is to verify that xcvrd works as expected by checking transceiver information in DB """ - hostname = testbed['dut'] - ans_host = AnsibleHost(ansible_adhoc, hostname) - localhost.command("who") - lab_conn_graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ - "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=hostname, filename=lab_conn_graph_file).\ - contacted['localhost']['ansible_facts'] - interfaces = conn_graph_facts["device_conn"] + dut = testbed_devices["dut"] logging.info("Check transceiver status") - check_transceiver_status(ans_host, interfaces) + check_transceiver_status(dut, conn_graph_facts["device_conn"]) From 8afd98e2d67747274bd774767f8b296ebafab503 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 23 Aug 2019 07:28:18 +0800 Subject: [PATCH 12/16] Make the VM reboot command async and ignore its errors (#1073) The command for rebooting VM was executed synchronously. Sometimes it could fail because the VM closed the SSH connection before ansible finishes the reboot command. The fix is to make the command async and ignore its errors. Signed-off-by: Xin Wang --- ansible/roles/eos/handlers/common_handlers/update_state.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/roles/eos/handlers/common_handlers/update_state.yml b/ansible/roles/eos/handlers/common_handlers/update_state.yml index fce6785b84d..d84085a058f 100755 --- a/ansible/roles/eos/handlers/common_handlers/update_state.yml +++ b/ansible/roles/eos/handlers/common_handlers/update_state.yml @@ -1,5 +1,8 @@ - name: Reboot the VM command: /sbin/shutdown -r now "Ansible updates triggered" + async: 300 + poll: 0 + ignore_errors: true - name: Wait for VM to shutdown wait_for: From 6ea63371c3c84b9e4c87b0db7430efd5eda33b86 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Mon, 26 Aug 2019 15:35:47 -0700 Subject: [PATCH 13/16] [fib]: Add IPv6 default route check (#1063) IPv6 default route is learned and has the same behavior as the IPv4 default route. Signed-off-by: Shu0T1an ChenG --- ansible/roles/test/files/ptftests/fib.py | 1 - ansible/roles/test/templates/fib.j2 | 14 ++++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/ansible/roles/test/files/ptftests/fib.py b/ansible/roles/test/files/ptftests/fib.py index 25af9f6a214..423430ee747 100644 --- a/ansible/roles/test/files/ptftests/fib.py +++ b/ansible/roles/test/files/ptftests/fib.py @@ -17,7 +17,6 @@ ] EXCLUDE_IPV6_PREFIXES = [ - '::/0', # Currently no IPv6 default route '::/128', # Unspecified RFC 4291 '::1/128', # Loopback RFC 4291 'ff00::/8' # Multicast RFC 4291 diff --git a/ansible/roles/test/templates/fib.j2 b/ansible/roles/test/templates/fib.j2 index 865e018e781..8648527d3d9 100644 --- a/ansible/roles/test/templates/fib.j2 +++ b/ansible/roles/test/templates/fib.j2 @@ -1,13 +1,17 @@ {# defualt route#} {% if testbed_type == 't1' %} 0.0.0.0/0 {% for ifname, v in minigraph_neighbors.iteritems() %}{% if "T2" in v.name %}{{ '[%d]' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %}{% endif %}{% endfor %} +::/0 {% for ifname, v in minigraph_neighbors.iteritems() %}{% if "T2" in v.name %}{{ '[%d]' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %}{% endif %}{% endfor %} {% elif testbed_type == 't0' or testbed_type == 't0-64' or testbed_type == 't1-lag' or testbed_type == 't0-64-32' %} 0.0.0.0/0 {% for portchannel, v in minigraph_portchannels.iteritems() %} +::/0 {% for portchannel, v in minigraph_portchannels.iteritems() %} [{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} {% elif testbed_type == 't1-64-lag' %} 0.0.0.0/0 [0 1] [4 5] [16 17] [20 21] +::/0 [0 1] [4 5] [16 17] [20 21] {% elif testbed_type == 't0-116' %} 0.0.0.0/0 [24 25] [26 27] [28 29] [30 31] +::/0 [24 25] [26 27] [28 29] [30 31] {% endif %} {#routes to uplink#} @@ -17,21 +21,15 @@ {% for subnet in range(0, props.tor_subnet_number) %} {% if testbed_type == 't1' %} 192.168.{{ podset }}.{{ tor * 16 + subnet }}/32 {% for ifname, v in minigraph_neighbors.iteritems() %}{% if "T2" in v.name %}{{ '[%d]' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %}{% endif %}{% endfor %} - 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 {% for ifname, v in minigraph_neighbors.iteritems() %}{% if "T2" in v.name %}{{ '[%d]' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %}{% endif %}{% endfor %} - {% elif testbed_type == 't1-lag' %} 192.168.{{ podset }}.{{ tor * 16 + subnet }}/32 {% for portchannel, v in minigraph_portchannels.iteritems() %} [{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} - 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 {% for portchannel, v in minigraph_portchannels.iteritems() %} [{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} - {% elif testbed_type == 't1-64-lag' %} 192.168.{{ podset }}.{{ tor * 16 + subnet }}/32 [0 1] [4 5] [16 17] [20 21] - 20C0:A8{{ '%02X' % podset }}:0:{{ '%02X' % (tor * 16 + subnet)}}::/64 [0 1] [4 5] [16 17] [20 21] - {% elif testbed_type == 't0' or testbed_type == 't0-64' or testbed_type == 't0-64-32' %} {% set suffix = ( (podset * props.tor_number * props.max_tor_subnet_number * props.tor_subnet_size) + (tor * props.max_tor_subnet_number * props.tor_subnet_size) + @@ -45,9 +43,7 @@ {# Skip 192.168.0.0 as it is in Vlan1000 subnet #} {% if octet2 != 168 and octet3 != 0 and octet4 != 0 %} {{ octet1 }}.{{ octet2 }}.{{ octet3 }}.{{ octet4 }}/{{ prefixlen_v4 }} {% for portchannel, v in minigraph_portchannels.iteritems() %}[{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} - {{ '20%02x' % octet1 }}:{{ '%02X%02X' % (octet2, octet3) }}:0:{{ '%02X' % octet4 }}::/64 {% for portchannel, v in minigraph_portchannels.iteritems() %}[{% for member in v.members %}{{ '%d' % minigraph_port_indices[member]}}{% if not loop.last %} {% endif %}{% endfor %}]{% if not loop.last %} {% endif %}{% endfor %} - {% endif %} {% elif testbed_type == 't0-116' %} {% set suffix = ( (podset * props.tor_number * props.max_tor_subnet_number * props.tor_subnet_size) + @@ -74,9 +70,7 @@ {% if "T0" in v.name %} {% for subnet in range(0, props_tor.tor_subnet_number) %} 172.16.{{ v.name|replace("ARISTA", "")|replace("T0", "")|int }}.{{ subnet }}/32 {{ '[%d]' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %} - 20AC:10{{ '%02X' % v.name|replace("ARISTA", "")|replace("T0", "")|int }}:0:{{ '%02X' % subnet }}::/64 {{ '[%d] ' % minigraph_port_indices[ifname]}}{% if not loop.last %} {% endif %} - {% endfor %} {% endif %} {% endfor %} From 7e320a4af18e0fe4a5739bf61db0850fc380af78 Mon Sep 17 00:00:00 2001 From: Mykola F <37578614+mykolaf@users.noreply.github.com> Date: Tue, 27 Aug 2019 23:30:24 +0300 Subject: [PATCH 14/16] [lag2] enhance lag2-minlink test (smart timeout) (#950) * [lag2] enhance lag2-minlink test Change-Id: I165d6ae8bc5ab2100b9d26c1d8c7ad02f0252cd8 Signed-off-by: Mykola Faryma * [lag2][minlnk] add option to specify max waiting time Change-Id: Ia0331f6cb78accbbb7019f30ec8b98f371cd8d98 Signed-off-by: Mykola Faryma --- ansible/roles/test/tasks/lag_minlink.yml | 33 +++++++++++++++++--- ansible/roles/test/tasks/single_lag_test.yml | 12 +++++-- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/ansible/roles/test/tasks/lag_minlink.yml b/ansible/roles/test/tasks/lag_minlink.yml index e7af57c9b9f..11866fe2a18 100644 --- a/ansible/roles/test/tasks/lag_minlink.yml +++ b/ansible/roles/test/tasks/lag_minlink.yml @@ -12,8 +12,27 @@ login: "{{switch_login[hwsku_map[peer_hwsku]]}}" connection: switch - - pause: - seconds: "{{ wait_down_time }}" + - name: Set delay + set_fact: + delay: 5 + + - name: Set retries + set_fact: + retries: "{{ (wait_down_time | int / delay | float) | round(0, 'ceil') }}" + + - name: Let portchannel react to neighbor interface shutdown + pause: + seconds: "{{ deselect_time }}" + + - name: "Verify PortChannel interfaces are up correctly" + shell: bash -c "teamdctl {{ po }} state dump" | python -c "import sys, json; print json.load(sys.stdin)['ports']['{{ item }}']['runner']['selected']" + register: out + until: out.stdout == "True" + with_items: "{{ po_interfaces.keys() }}" + when: item != "{{ flap_intf }}" + become: "yes" + retries: "{{ retries | int }}" + delay: "{{ delay }}" - lag_facts: host={{ inventory_hostname }} @@ -44,8 +63,14 @@ login: "{{switch_login[hwsku_map[peer_hwsku]]}}" connection: switch - - pause: - seconds: 35 + - name: "Verify PortChannel interfaces are up correctly" + shell: bash -c "teamdctl {{ po }} state dump" | python -c "import sys, json; print json.load(sys.stdin)['ports']['{{ item }}']['link']['up']" + register: out + until: out.stdout == "True" + with_items: "{{ po_interfaces.keys() }}" + become: "yes" + retries: "{{ retries | int }}" + delay: "{{ delay }}" - lag_facts: host={{ inventory_hostname }} diff --git a/ansible/roles/test/tasks/single_lag_test.yml b/ansible/roles/test/tasks/single_lag_test.yml index 852eb5734cb..b9f2765954e 100644 --- a/ansible/roles/test/tasks/single_lag_test.yml +++ b/ansible/roles/test/tasks/single_lag_test.yml @@ -2,6 +2,12 @@ ### This playbook test one single port channel minimum link feature of one member interface shutdown ### and portchannel member interfaces sending ACP DU rate +# Set maximum value of "smart" timeout to be the same as before, +# user can now set own value outside the test, for example by passing '-e wait_timeout=5' +- set_fact: + wait_timeout: 30 + when: "wait_timeout is not defined" + # Gather information of port channel ports, minimum links and total interface member numbers - set_fact: po: "{{ item }}" @@ -31,7 +37,8 @@ - name: test fanout interface (physical) flap and lacp keep correct po status follow minimum links requirement include: lag_minlink.yml vars: - wait_down_time: 35 + deselect_time: 5 + wait_down_time: "{{ wait_timeout | int }}" ### Now figure out remote VM and interface info for the flapping lag member and run minlink test - set_fact: @@ -45,4 +52,5 @@ - name: test vm interface flap (no physical port down, more like remote port lock) that lag interface can change to correct po status follow minimum links requirement include: lag_minlink.yml vars: - wait_down_time: 120 + deselect_time: 95 + wait_down_time: "{{ wait_timeout | int }}" From 095b4664cb605964cf6e059eff44abdc63596d8f Mon Sep 17 00:00:00 2001 From: yvolynets-mlnx <50697593+yvolynets-mlnx@users.noreply.github.com> Date: Wed, 28 Aug 2019 21:27:24 +0300 Subject: [PATCH 15/16] Added loganalyzer for pytest (#1048) * Added loganalyzer for pytest. Updated existed loganalyzer. * Added loganalyzer for pytest. Updated existed loganalyzer. * Added fixes based on comments * Update README.md * Update README.md * Added minor fixes based on comments. Signed-off-by: Yuriy Volynets * Added minor fix. Signed-off-by: Yuriy Volynets * Added 'disable_loganalyzer' marker --- ansible/library/extract_log.py | 8 +- .../files/tools/loganalyzer/loganalyzer.py | 78 +++--- .../loganalyzer/loganalyzer_common_match.txt | 4 +- tests/conftest.py | 18 ++ tests/loganalyzer/README.md | 135 ++++++++++ tests/loganalyzer/__init__.py | 1 + tests/loganalyzer/loganalyzer.py | 232 ++++++++++++++++++ .../loganalyzer/loganalyzer_common_expect.txt | 1 + .../loganalyzer/loganalyzer_common_ignore.txt | 1 + .../loganalyzer/loganalyzer_common_match.txt | 1 + tests/loganalyzer/system_msg_handler.py | 1 + tests/platform/test_reboot.py | 2 + 12 files changed, 449 insertions(+), 33 deletions(-) create mode 100755 tests/loganalyzer/README.md create mode 100755 tests/loganalyzer/__init__.py create mode 100755 tests/loganalyzer/loganalyzer.py create mode 120000 tests/loganalyzer/loganalyzer_common_expect.txt create mode 120000 tests/loganalyzer/loganalyzer_common_ignore.txt create mode 120000 tests/loganalyzer/loganalyzer_common_match.txt create mode 120000 tests/loganalyzer/system_msg_handler.py diff --git a/ansible/library/extract_log.py b/ansible/library/extract_log.py index 032771ef51f..9cf8c1cf5ea 100644 --- a/ansible/library/extract_log.py +++ b/ansible/library/extract_log.py @@ -80,8 +80,6 @@ from datetime import datetime from ansible.module_utils.basic import * -from pprint import pprint - def extract_lines(directory, filename, target_string): path = os.path.join(directory, filename) @@ -100,6 +98,7 @@ def extract_lines(directory, filename, target_string): return result + def extract_number(s): """Extracts number from string, if not number found returns 0""" ns = re.findall(r'\d+', s) @@ -112,6 +111,10 @@ def extract_number(s): def convert_date(s): dt = None re_result = re.findall(r'^\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}\.?\d*', s) + # Workaround for pytest-ansible + loc = locale.getlocale() + locale.setlocale(locale.LC_ALL, (None, None)) + if len(re_result) > 0: str_date = re_result[0] try: @@ -122,6 +125,7 @@ def convert_date(s): re_result = re.findall(r'^\d{4}-\d{2}-\d{2}\.\d{2}:\d{2}:\d{2}\.\d{6}', s) str_date = re_result[0] dt = datetime.strptime(str_date, '%Y-%m-%d.%X.%f') + locale.setlocale(locale.LC_ALL, loc) return dt diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py index 8875e874b47..0d963610681 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer.py @@ -32,7 +32,7 @@ comment_key = '#' system_log_file = '/var/log/syslog' -#-- List of ERROR codes to be returned by LogAnalyzer +#-- List of ERROR codes to be returned by AnsibleLogAnalyzer err_duplicate_start_marker = -1 err_duplicate_end_marker = -2 err_no_end_marker = -3 @@ -40,7 +40,7 @@ err_invalid_string_format = -5 err_invalid_input = -6 -class LogAnalyzer: +class AnsibleLogAnalyzer: ''' @summary: Overview of functionality @@ -52,10 +52,10 @@ class LogAnalyzer: AND will not match set of 'ignore' regex expressions, will be considered a 'match' and will be reported. - LogAnalyzer will be called initially before any test has ran, and will be + AnsibleLogAnalyzer will be called initially before any test has ran, and will be instructed to place 'start' marker into all log files to be analyzed. - When tests have ran, LogAnalyzer will be instructed to place end-marker - into the log files. After this, LogAnalyzer will be invoked to perform the + When tests have ran, AnsibleLogAnalyzer will be instructed to place end-marker + into the log files. After this, AnsibleLogAnalyzer will be invoked to perform the analysis of logs. The analysis will be performed on specified log files. For each log file only the content between start/end markers will be analyzed. @@ -105,25 +105,41 @@ def create_end_marker(self): return self.end_marker_prefix + "-" + self.run_id #--------------------------------------------------------------------- - def place_marker(self, log_file_list, marker): + def place_marker_to_file(self, log_file, marker): ''' @summary: Place marker into each log file specified. + @param log_file : File path, to be applied with marker. + @param marker: Marker to be placed into log files. + ''' + if not len(log_file) or self.is_filename_stdin(log_file): + self.print_diagnostic_message('Log file {} not found. Skip adding marker.'.format(log_file)) + self.print_diagnostic_message('log file:{}, place marker {}'.format(log_file, marker)) + with open(log_file, 'a') as file: + file.write(marker) + file.write('\n') + file.flush() + + def place_marker_to_syslog(self, marker): + ''' + @summary: Place marker into '/dev/log'. + @param marker: Marker to be placed into syslog. + ''' + + syslogger = self.init_sys_logger() + syslogger.info(marker) + syslogger.info('\n') + + def place_marker(self, log_file_list, marker): + ''' + @summary: Place marker into '/dev/log' and each log file specified. @param log_file_list : List of file paths, to be applied with marker. @param marker: Marker to be placed into log files. ''' for log_file in log_file_list: - if not len(log_file) or self.is_filename_stdin(log_file): - continue - self.print_diagnostic_message('log file:%s, place marker %s'%(log_file, marker)) - with open(log_file, 'a') as file: - file.write(marker) - file.write('\n') - file.flush() + self.place_marker_to_file(log_file, marker) - syslogger = self.init_sys_logger() - syslogger.info(marker) - syslogger.info('\n') + self.place_marker_to_syslog(marker) return #--------------------------------------------------------------------- @@ -183,11 +199,15 @@ def create_msg_regex(self, file_lsit): skipinitialspace=True) for index, row in enumerate(csvreader): + row = [item for item in row if item != ""] self.print_diagnostic_message('[diagnostic]:processing row:%d' % index) self.print_diagnostic_message('row:%s'% row) try: - #-- Ignore commented Lines and Empty Lines - if (not row or row[0].startswith(comment_key)): + #-- Ignore Empty Lines + if not row: + continue + #-- Ignore commented Lines + if row[0].startswith(comment_key): self.print_diagnostic_message('[diagnostic]:skipping row[0]:%s' % row[0]) continue @@ -202,13 +222,10 @@ def create_msg_regex(self, file_lsit): 'must be \'s\'(string) or \'r\'(regex)' %(filename,index)) - #-- One error message per line - error_string = row[1] - if (is_regex): - messages_regex.append(error_string) + messages_regex.extend(row[1:]) else: - messages_regex.append(self.error_to_regx(error_string)) + messages_regex.append(self.error_to_regx(row[1:])) except Exception as e: print 'ERROR: line %d is formatted incorrectly in file %s. Skipping line' % (index, filename) @@ -393,10 +410,11 @@ def usage(): print ' init - initialize analysis by placing start-marker' print ' to all log files specified in --logs parameter.' print ' analyze - perform log analysis of files specified in --logs parameter.' + print ' add_end_marker - add end marker to all log files specified in --logs parameter.' print '--out_dir path Directory path where to place output files, ' print ' must be present when --action == analyze' print '--logs path{,path} List of full paths to log files to be analyzed.' - print ' Implicetly system log file will be also processed' + print ' Implicitly system log file will be also processed' print '--run_id string String passed to loganalyzer, uniquely identifying ' print ' analysis session. Used to construct start/end markers. ' print '--match_files_in path{,path} List of paths to files containing strings. A string from log file' @@ -426,6 +444,8 @@ def check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, if (action == 'init'): ret_code = True + elif (action == 'add_end_marker'): + ret_code = True elif (action == 'analyze'): if out_dir is None or len(out_dir) == 0: print 'ERROR: missing required out_dir for analyze action' @@ -500,12 +520,10 @@ def write_result_file(run_id, out_dir, analysis_result_per_file, messages_regex_ out_file.write('Total matches:%d\n' % match_cnt) # Find unused regex matches for regex in messages_regex_e: - regex_used = False for line in expected_lines_total: if re.search(regex, line): - regex_used = True break - if not regex_used: + else: unused_regex_messages.append(regex) out_file.write('Total expected and found matches:%d\n' % expected_cnt) @@ -515,7 +533,6 @@ def write_result_file(run_id, out_dir, analysis_result_per_file, messages_regex_ out_file.write("\n-------------------------------------------------\n\n") out_file.flush() - #--------------------------------------------------------------------- def write_summary_file(run_id, out_dir, analysis_result_per_file, unused_regex_messages): @@ -607,7 +624,7 @@ def main(argv): usage() sys.exit(err_invalid_input) - analyzer = LogAnalyzer(run_id, verbose) + analyzer = AnsibleLogAnalyzer(run_id, verbose) log_file_list = filter(None, log_files_in.split(tokenizer)) @@ -635,6 +652,9 @@ def main(argv): unused_regex_messages = [] write_result_file(run_id, out_dir, result, messages_regex_e, unused_regex_messages) write_summary_file(run_id, out_dir, result, unused_regex_messages) + elif (action == "add_end_marker"): + analyzer.place_marker(log_file_list, analyzer.create_end_marker()) + return 0 else: print 'Unknown action:%s specified' % action diff --git a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt index d17c702965f..23c2870e4a9 100644 --- a/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt +++ b/ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt @@ -1,6 +1,6 @@ r, "\.ERR", "\.WARN", "crash" r, "kernel:.*Oops", "kernel:.*hung", "kernel.*oom\s" r, "kernel:.*scheduling", "kernel:.*atomic", "kernel:.*panic" -r, "kernel:.*\serr", "kernel:.*allocation", "kernel:.*kill", -r, "kernel:.*kmemleak.*","kernel:.* Err:" +r, "kernel:.*\serr", "kernel:.*allocation", "kernel:.*kill" +r, "kernel:.*kmemleak.*", "kernel:.* Err:" s, "ERR" diff --git a/tests/conftest.py b/tests/conftest.py index 2701fdf06fd..edd251c4042 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ import ipaddr as ipaddress from ansible_host import AnsibleHost +from loganalyzer import LogAnalyzer pytest_plugins = ('ptf_fixtures', 'ansible_fixtures') @@ -52,6 +53,7 @@ def __init__(self, testbed_file): def pytest_addoption(parser): parser.addoption("--testbed", action="store", default=None, help="testbed name") parser.addoption("--testbed_file", action="store", default=None, help="testbed file name") + parser.addoption("--disable_loganalyzer", action="store_true", default=False, help="disable loganalyzer analysis for 'loganalyzer' fixture") @pytest.fixture(scope="session") @@ -121,3 +123,19 @@ def eos(): with open('eos/eos.yml') as stream: eos = yaml.safe_load(stream) return eos + +@pytest.fixture(autouse=True) +def loganalyzer(duthost, request): + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=request.node.name) + # Add start marker into DUT syslog + marker = loganalyzer.init() + yield loganalyzer + if not request.config.getoption("--disable_loganalyzer") and "disable_loganalyzer" not in request.keywords: + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Parse syslog and process result. Raise "LogAnalyzerError" exception if: total match or expected missing match is not equal to zero + loganalyzer.analyze(marker) + else: + # Add end marker into DUT syslog + loganalyzer._add_end_marker(marker) + diff --git a/tests/loganalyzer/README.md b/tests/loganalyzer/README.md new file mode 100755 index 00000000000..db9cc80fa38 --- /dev/null +++ b/tests/loganalyzer/README.md @@ -0,0 +1,135 @@ +#### Loganalyzer API usage example + +Below is described possibility of loganalyzer fixture/module usage. + +##### Loganalyzer fixture +In the root conftest there is implemented "loganalyzer" pytest fixture, which starts automatically for all test cases. +Fixture main flow: +- loganalyzer will add start marker before test case start +- loganalyzer will add stop marker after test case finish +- if loganalyzer analysis is not disabled for current test case it will analyze DUT syslog and display results. +If loganalyzer find specified messages which corresponds to defined regular expressions, it will display found messages and pytest will generate 'error'. + +#### To skip loganalyzer analysis for: +- all test cases - use pytest command line option ```--disable_loganalyzer``` +- specific test case: mark test case with ```@pytest.mark.disable_loganalyzer``` decorator. Example is shown below. + + +#### Notes: +loganalyzer.init() - can be called several times without calling "loganalyzer.analyze(marker)" between calls. Each call return its unique marker, which is used for "analyze" phase - loganalyzer.analyze(marker). + + +### Loganalyzer usage example + +#### Example calling loganalyzer init/analyze methods automatically by using with statement +```python + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Analyze syslog for code executed inside with statement + with loganalyzer as analyzer: + logging.debug("============== Test steps ===================") + # Add test code here ... + time.sleep(1) + + # Separately analyze syslog for code executed inside each with statement + with loganalyzer as analyzer: + # Clear current regexp match list if there is a need to have clear configuration + loganalyzer.match_regex = [] + # Load regular expressions from the specified file + reg_exp = loganalyzer.parse_regexp_file(src=COMMON_MATCH) + # Extend currently configured match criteria (regular expressions) with data read from "COMMON_MATCH" file + loganalyzer.match_regex.extend(reg_exp) + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) + with loganalyzer as analyzer: + # Clear current regexp match list if there is a need to have clear configuration + loganalyzer.match_regex = [] + # Set match criteria (regular expression) to custom regexp - "test:.*Error" + loganalyzer.match_regex.extend(["test:.*Error"]) + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) + with loganalyzer as analyzer: + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) +``` + +#### Example calling loganalyzer init/analyze methods directly in test case +```python + # Example 1 + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that error messages were not found in DUT syslog. Exception will be raised if in DUT syslog will be found messages which fits regexp defined in COMMON_MATCH + loganalyzer.analyze(marker) + + # Example 2 + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Get summary of analyzed DUT syslog + result = loganalyzer.analyze(marker, fail=False) + # Verify that specific amount of error messages found in syslog # Negative test case + assert result["total"]["match"] == 2, "Not found expected errors: {}".format(result) + + # Example 3 + # Download extracted syslog file from DUT to the local host + loganalyzer.save_extracted_log(dest="/tmp/log/syslog") + + # Example 4 + # Update previously configured marker + # Now start marker will have new prefix - test_bgp + loganalyzer.update_marker_prefix("test_bgp") + + def get_platform_info(dut): + """ + Example callback which gets DUT platform information and returns obtained string + """ + return dut.command("show platform summary") + + # Example 5 + # Execute specific function and analyze logs during function execution + run_cmd_result = loganalyzer.run_cmd(get_platform_info, ans_host) + # Process result of "get_platform_info" callback + assert all(item in run_cmd_result["stdout"] for item in ["Platform", "HwSKU", "ASIC"]) is True, "Unexpected output returned after command execution: {}".format(run_cmd_result) + + # Example 6 + # Clear current regexp match list + loganalyzer.match_regex = [] + # Load regular expressions from the specified file defined in COMMON_MATCH variable + reg_exp = loganalyzer.parse_regexp_file(src=COMMON_MATCH) + # Extend currently configured match criteria (regular expressions) with data read from "COMMON_MATCH" file + loganalyzer.match_regex.extend(reg_exp) + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that error messages were not found in DUT syslog. Exception will be raised if in DUT syslog will be found messages which fits regexp defined in COMMON_MATCH + loganalyzer.analyze(marker) + + # Example 7 + loganalyzer.expect_regex = [] + # Add specific EXPECTED regular expression + # Means that in the DUT syslog loganalyzer will search for message which matches with "kernel:.*Oops" regular expression + # If such message will not be present in DUT syslog, it will raise exception + loganalyzer.expect_regex.append("kernel:.*Oops") + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that expected error messages WERE FOUND in DUT syslog. Exception will be raised if in DUT syslog will NOT be found messages which fits to "kernel:.*Oops" regular expression + loganalyzer.analyze(marker) + + # Example 8 + loganalyzer.expect_regex = [] + # Add specific EXPECTED regular expression + # Means that in the DUT syslog loganalyzer will search for message which matches with "kernel:.*Oops" regular expression + # If such message will not be present in DUT syslog, it will raise exception + loganalyzer.expect_regex.append("kernel:.*Oops") + # PERFORM TEST CASE STEPS ... + # Verify that expected error messages WERE FOUND in DUT syslog. Exception will be raised if in DUT syslog will NOT be found messages which fits to "kernel:.*Oops" regular expression + loganalyzer.run_cmd(ans_host.command, "echo '---------- kernel: says Oops --------------' >> /var/log/syslog") +``` diff --git a/tests/loganalyzer/__init__.py b/tests/loganalyzer/__init__.py new file mode 100755 index 00000000000..e32a3567489 --- /dev/null +++ b/tests/loganalyzer/__init__.py @@ -0,0 +1 @@ +from .loganalyzer import LogAnalyzer, COMMON_MATCH, COMMON_IGNORE, COMMON_EXPECT, LogAnalyzerError diff --git a/tests/loganalyzer/loganalyzer.py b/tests/loganalyzer/loganalyzer.py new file mode 100755 index 00000000000..8d9b8b1e663 --- /dev/null +++ b/tests/loganalyzer/loganalyzer.py @@ -0,0 +1,232 @@ +import sys +import logging +import os +import re +import time +import pprint +import system_msg_handler + +from system_msg_handler import AnsibleLogAnalyzer as ansible_loganalyzer +from os.path import join, split +from os.path import normpath + +ANSIBLE_LOGANALYZER_MODULE = system_msg_handler.__file__.replace(r".pyc", ".py") +COMMON_MATCH = join(split(__file__)[0], "loganalyzer_common_match.txt") +COMMON_IGNORE = join(split(__file__)[0], "loganalyzer_common_ignore.txt") +COMMON_EXPECT = join(split(__file__)[0], "loganalyzer_common_expect.txt") +SYSLOG_TMP_FOLDER = "/tmp/pytest-run/syslog" + + +class LogAnalyzerError(Exception): + """Raised when loganalyzer found matches during analysis phase.""" + def __repr__(self): + return pprint.pformat(self.message) + + +class LogAnalyzer: + def __init__(self, ansible_host, marker_prefix, dut_run_dir="/tmp"): + self.ansible_host = ansible_host + self.dut_run_dir = dut_run_dir + self.extracted_syslog = os.path.join(self.dut_run_dir, "syslog") + self.marker_prefix = marker_prefix + self.ansible_loganalyzer = ansible_loganalyzer(self.marker_prefix, False) + + self.match_regex = [] + self.expect_regex = [] + self.ignore_regex = [] + self._markers = [] + + def _add_end_marker(self, marker): + """ + @summary: Add stop marker into syslog on the DUT. + + @return: True for successfull execution False otherwise + """ + self.ansible_host.copy(src=ANSIBLE_LOGANALYZER_MODULE, dest=os.path.join(self.dut_run_dir, "loganalyzer.py")) + + cmd = "python {run_dir}/loganalyzer.py --action add_end_marker --run_id {marker}".format(run_dir=self.dut_run_dir, marker=marker) + + logging.debug("Adding end marker '{}'".format(marker)) + self.ansible_host.command(cmd) + + def __enter__(self): + """ + Store start markers which are used in analyze phase. + """ + self._markers.append(self.init()) + + def __exit__(self, *args): + """ + Analyze syslog messages. + """ + self.analyze(self._markers.pop()) + + def _verify_log(self, result): + """ + Verify that total match and expected missing match equals to zero or raise exception otherwise. + Verify that expected_match is not equal to zero when there is configured expected regexp in self.expect_regex list + """ + if not result: + raise LogAnalyzerError("Log analyzer failed - no result.") + if result["total"]["match"] != 0 or result["total"]["expected_missing_match"] != 0: + raise LogAnalyzerError(result) + + # Check for negative case + if self.expect_regex and result["total"]["expected_match"] == 0: + raise LogAnalyzerError(result) + + def update_marker_prefix(self, marker_prefix): + """ + @summary: Update configured marker prefix + """ + self.marker_prefix = marker_prefix + + def load_common_config(self): + """ + @summary: Load regular expressions from common files, which are localted in folder with legacy loganalyzer. + Loaded regular expressions are used by "analyze" method to match expected text in the downloaded log file. + """ + self.match_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_MATCH])[1] + self.ignore_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_IGNORE])[1] + self.expect_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_EXPECT])[1] + + def parse_regexp_file(self, src): + """ + @summary: Get regular expressions defined in src file. + """ + return self.ansible_loganalyzer.create_msg_regex([src])[1] + + def run_cmd(self, callback, *args, **kwargs): + """ + @summary: Initialize loganalyzer, execute function and analyze syslog. + + @param callback: Python callable or function to be executed. + @param args: Input arguments for callback function. + @param kwargs: Input key value arguments for callback function. + + @return: Callback execution result + """ + marker = self.init() + try: + call_result = callback(*args, **kwargs) + except Exception as err: + logging.error("Error during callback execution:\n{}".format(err)) + logging.debug("Log analysis result\n".format(self.analyze(marker))) + raise err + self.analyze(marker) + + return call_result + + def init(self): + """ + @summary: Add start marker into syslog on the DUT. + + @return: True for successfull execution False otherwise + """ + logging.debug("Loganalyzer init") + + self.ansible_host.copy(src=ANSIBLE_LOGANALYZER_MODULE, dest=os.path.join(self.dut_run_dir, "loganalyzer.py")) + + start_marker = ".".join((self.marker_prefix, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + cmd = "python {run_dir}/loganalyzer.py --action init --run_id {start_marker}".format(run_dir=self.dut_run_dir, start_marker=start_marker) + + logging.debug("Adding start marker '{}'".format(start_marker)) + self.ansible_host.command(cmd) + return start_marker + + def analyze(self, marker, fail=True): + """ + @summary: Extract syslog logs based on the start/stop markers and compose one file. Download composed file, analyze file based on defined regular expressions. + + @param marker: Marker obtained from "init" method. + @param fail: Flag to enable/disable raising exception when loganalyzer find error messages. + + @return: If "fail" is False - return dictionary of parsed syslog summary, if dictionary can't be parsed - return empty dictionary. If "fail" is True and if found match messages - raise exception. + """ + logging.debug("Loganalyzer analyze") + analyzer_summary = {"total": {"match": 0, "expected_match": 0, "expected_missing_match": 0}, + "match_files": {}, + "match_messages": {}, + "expect_messages": {}, + "unused_expected_regexp": [] + } + tmp_folder = ".".join((SYSLOG_TMP_FOLDER, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + self.ansible_loganalyzer.run_id = marker + + # Add end marker into DUT syslog + self._add_end_marker(marker) + + try: + # Disable logrotate cron task + self.ansible_host.command("sed -i 's/^/#/g' /etc/cron.d/logrotate") + + logging.debug("Waiting for logrotate from previous cron task run to finish") + # Wait for logrotate from previous cron task run to finish + end = time.time() + 60 + while time.time() < end: + # Verify for exception because self.ansible_host automatically handle command return codes and raise exception for none zero code + try: + self.ansible_host.command("pgrep -f logrotate") + except Exception: + break + else: + time.sleep(5) + continue + else: + logging.error("Logrotate from previous task was not finished during 60 seconds") + + # On DUT extract syslog files from /var/log/ and create one file by location - /tmp/syslog + self.ansible_host.extract_log(directory='/var/log', file_prefix='syslog', start_string='start-LogAnalyzer-{}'.format(marker), target_filename=self.extracted_syslog) + finally: + # Enable logrotate cron task back + self.ansible_host.command("sed -i 's/^#//g' /etc/cron.d/logrotate") + + # Download extracted logs from the DUT to the temporal folder defined in SYSLOG_TMP_FOLDER + self.save_extracted_log(dest=tmp_folder) + + match_messages_regex = re.compile('|'.join(self.match_regex)) if len(self.match_regex) else None + ignore_messages_regex = re.compile('|'.join(self.ignore_regex)) if len(self.ignore_regex) else None + expect_messages_regex = re.compile('|'.join(self.expect_regex)) if len(self.expect_regex) else None + + analyzer_parse_result = self.ansible_loganalyzer.analyze_file_list([tmp_folder], match_messages_regex, ignore_messages_regex, expect_messages_regex) + # Print syslog file content and remove the file + with open(tmp_folder) as fo: + logging.debug("Syslog content:\n\n{}".format(fo.read())) + os.remove(tmp_folder) + + total_match_cnt = 0 + total_expect_cnt = 0 + expected_lines_total = [] + unused_regex_messages = [] + + for key, value in analyzer_parse_result.iteritems(): + matching_lines, expecting_lines = value + analyzer_summary["total"]["match"] += len(matching_lines) + analyzer_summary["total"]["expected_match"] += len(expecting_lines) + analyzer_summary["match_files"][key] = {"match": len(matching_lines), "expected_match": len(expecting_lines)} + analyzer_summary["match_messages"][key] = matching_lines + analyzer_summary["expect_messages"][key] = expecting_lines + expected_lines_total.extend(expecting_lines) + + # Find unused regex matches + for regex in self.expect_regex: + for line in expected_lines_total: + if re.search(regex, line): + break + else: + unused_regex_messages.append(regex) + analyzer_summary["total"]["expected_missing_match"] = len(unused_regex_messages) + analyzer_summary["unused_expected_regexp"] = unused_regex_messages + + if fail: + self._verify_log(analyzer_summary) + else: + return analyzer_summary + + def save_extracted_log(self, dest): + """ + @summary: Download extracted syslog log file to the ansible host. + + @param dest: File path to store downloaded log file. + """ + self.ansible_host.fetch(dest=dest, src=self.extracted_syslog, flat="yes") diff --git a/tests/loganalyzer/loganalyzer_common_expect.txt b/tests/loganalyzer/loganalyzer_common_expect.txt new file mode 120000 index 00000000000..2ae3246b088 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_expect.txt @@ -0,0 +1 @@ +../../ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_expect.txt \ No newline at end of file diff --git a/tests/loganalyzer/loganalyzer_common_ignore.txt b/tests/loganalyzer/loganalyzer_common_ignore.txt new file mode 120000 index 00000000000..ad2643fb427 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_ignore.txt @@ -0,0 +1 @@ +../../ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_ignore.txt \ No newline at end of file diff --git a/tests/loganalyzer/loganalyzer_common_match.txt b/tests/loganalyzer/loganalyzer_common_match.txt new file mode 120000 index 00000000000..1f865571b62 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_match.txt @@ -0,0 +1 @@ +../../ansible/roles/test/files/tools/loganalyzer/loganalyzer_common_match.txt \ No newline at end of file diff --git a/tests/loganalyzer/system_msg_handler.py b/tests/loganalyzer/system_msg_handler.py new file mode 120000 index 00000000000..4fd54f816fc --- /dev/null +++ b/tests/loganalyzer/system_msg_handler.py @@ -0,0 +1 @@ +../../ansible/roles/test/files/tools/loganalyzer/loganalyzer.py \ No newline at end of file diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index f266876b11e..d593ec46745 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -22,6 +22,8 @@ from check_transceiver_status import check_transceiver_basic from check_transceiver_status import all_transceivers_detected +pytestmark = [pytest.mark.disable_loganalyzer] + def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"): """ From 3f8edd3fa98628817083a239639674f3cdfe8d8b Mon Sep 17 00:00:00 2001 From: Neetha John Date: Wed, 28 Aug 2019 15:22:41 -0700 Subject: [PATCH 16/16] [warm-reboot] Preboot sad path automation for n lag members (#1036) * Preboot sad path automation for n lag members Signed-off-by: Neetha John --- .../test/files/ptftests/advanced-reboot.py | 38 ++++-- ansible/roles/test/files/ptftests/arista.py | 15 ++- ansible/roles/test/files/ptftests/sad_path.py | 119 +++++++++++++----- .../advanced_reboot/validate_preboot_list.yml | 2 +- .../test/tasks/warm-reboot-multi-sad.yml | 13 +- 5 files changed, 142 insertions(+), 45 deletions(-) diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 7a94bc4d4b6..9a015aa09ce 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -319,7 +319,7 @@ def get_portchannel_info(self): for member in content[key]['members']: for vm_key in self.vm_dut_map.keys(): if member in self.vm_dut_map[vm_key]['dut_ports']: - self.vm_dut_map[vm_key]['dut_portchannel'] = key + self.vm_dut_map[vm_key]['dut_portchannel'] = str(key) self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel1' break @@ -327,8 +327,8 @@ def get_neigh_port_info(self): content = self.read_json('neigh_port_info') for key in content.keys(): if content[key]['name'] in self.vm_dut_map.keys(): - self.vm_dut_map[content[key]['name']]['dut_ports'].append(key) - self.vm_dut_map[content[key]['name']]['neigh_ports'].append(content[key]['port']) + self.vm_dut_map[content[key]['name']]['dut_ports'].append(str(key)) + self.vm_dut_map[content[key]['name']]['neigh_ports'].append(str(content[key]['port'])) self.vm_dut_map[content[key]['name']]['ptf_ports'].append(self.port_indices[key]) def build_peer_mapping(self): @@ -355,6 +355,30 @@ def populate_fail_info(self, fails): self.fails[key] = set() self.fails[key] |= fails[key] + def get_preboot_info(self): + ''' + Prepares the msg string to log when a preboot_oper is defined. + preboot_oper can be represented in the following ways + eg. 'preboot_oper' - a single VM will be selected and preboot_oper will be applied to it + 'neigh_bgp_down:2' - 2 VMs will be selected and preboot_oper will be applied to the selected 2 VMs + 'neigh_lag_member_down:3:1' - this case is used for lag member down operation only. This indicates that + 3 VMs will be selected and 1 of the lag members in the porchannel will be brought down + ''' + msg = '' + if self.preboot_oper: + msg = 'Preboot oper: %s ' % self.preboot_oper + if ':' in self.preboot_oper: + oper_list = self.preboot_oper.split(':') + msg = 'Preboot oper: %s ' % oper_list[0] # extract the preboot oper_type + if len(oper_list) > 2: + # extract the number of VMs and the number of LAG members. preboot_oper will be of the form oper:no of VMS:no of lag members + msg += 'Number of sad path VMs: %s Lag member down in a portchannel: %s' % (oper_list[-2], oper_list[-1]) + else: + # extract the number of VMs. preboot_oper will be of the form oper:no of VMS + msg += 'Number of sad path VMs: %s' % oper_list[-1] + + return msg + def setUp(self): self.fails['dut'] = set() self.port_indices = self.read_port_indices() @@ -427,13 +451,7 @@ def setUp(self): self.generate_arp_ping_packet() if self.reboot_type == 'warm-reboot': - # get the number of members down for sad path - if self.preboot_oper: - if ':' in self.preboot_oper: - oper_type, cnt = self.preboot_oper.split(':') - else: - oper_type, cnt = self.preboot_oper, 1 - self.log("Preboot Oper: %s Number down: %s" % (oper_type, cnt)) + self.log(self.get_preboot_info()) # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index 04459417849..db967eb0bda 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -396,18 +396,23 @@ def verify_bgp_neigh_state(self, dut=None, state="Active"): self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver) return self.fails, bgp_state - def change_neigh_lag_state(self, lag, is_up=True): + def change_neigh_lag_state(self, intf, is_up=True): state = ['shut', 'no shut'] self.do_cmd('configure') - is_match = re.match('(Port-Channel|Ethernet)\d+', lag) + is_match = re.match('(Port-Channel|Ethernet)\d+', intf) if is_match: - output = self.do_cmd('interface %s' % lag) + output = self.do_cmd('interface %s' % intf) if 'Invalid' not in output: self.do_cmd(state[is_up]) self.do_cmd('exit') - self.do_cmd('exit') + self.do_cmd('exit') + + def change_neigh_intfs_state(self, intfs, is_up=True): + for intf in intfs: + self.change_neigh_lag_state(intf, is_up=is_up) def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): + states = state.split(',') lag_state = False msg_prefix = ['Postboot', 'Preboot'] is_match = re.match('(Port-Channel|Ethernet)\d+', lag) @@ -418,7 +423,7 @@ def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): obj = json.loads(data) if 'interfaces' in obj and lag in obj['interfaces']: - lag_state = (obj['interfaces'][lag]['interfaceStatus'] == state) + lag_state = (obj['interfaces'][lag]['interfaceStatus'] in states) else: self.fails.add('%s: Verify LAG %s: Object missing in output' % (msg_prefix[pre_check], lag)) return self.fails, lag_state diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index 958e4be2e58..bf722d917f5 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -36,8 +36,9 @@ def revert(self): class SadPath(object): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args): - (self.oper_type, self.cnt) = oper_type.split(':') if ':' in oper_type else (oper_type, 1) - self.cnt = int(self.cnt) + self.oper_type = '' + self.cnt = 1 + self.memb_cnt = 0 self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map @@ -50,6 +51,21 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.log = [] self.fails = dict() self.fails['dut'] = set() + self.tot_memb_cnt = 0 + self.memb_index = 0 + self.extract_oper_info(oper_type) + + def extract_oper_info(self, oper_type): + if oper_type and ':' in oper_type: + temp = oper_type.split(':') + self.oper_type = temp[0] + # get number of VMs where the sad pass oper needs to be done + self.cnt = int(temp[1]) + if len(temp) > 2: + # get the number of lag members in a portchannel that should be brought down + self.memb_cnt = int(temp[-1]) + else: + self.oper_type = oper_type def cmd(self, cmds): process = subprocess.Popen(cmds, @@ -74,7 +90,7 @@ def select_vm(self): else: self.neigh_vms.extend(self.vm_list[vm_index:]) self.neigh_vms.extend(self.vm_list[0:exceed_len]) - self.vm_list = self.vm_list[exceed_len:vm_len - self.cnt] + self.vm_list = self.vm_list[exceed_len:exceed_len + vm_len - self.cnt] def get_neigh_name(self): for key in self.vm_dut_map: @@ -101,11 +117,25 @@ def vm_disconnect(self): for vm in self.vm_handles: self.vm_handles[vm].disconnect() + def select_member(self): + # select index of lag member to put down + if self.tot_memb_cnt != 0: + self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt + def setup(self): self.select_vm() self.get_neigh_name() self.down_neigh_port() self.vm_connect() + + # decide if its all member down or few members down for lag member oper type + if 'member' in self.oper_type: + self.tot_memb_cnt = len(self.vm_dut_map[self.neigh_names.values()[0]]['dut_ports']) + if self.memb_cnt == 0: + self.memb_cnt = self.tot_memb_cnt + if self.tot_memb_cnt != self.memb_cnt: + self.select_member() + for vm in self.vm_handles: self.neigh_bgps[vm], self.dut_bgps[vm] = self.vm_handles[vm].get_bgp_info() self.fails[vm] = set() @@ -128,9 +158,11 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, self.dut_ssh = dut_ssh self.dut_needed = dict() self.lag_members_down = dict() + self.neigh_lag_members_down = dict() self.neigh_lag_state = None self.po_neigh_map = dict() self.msg_prefix = ['Postboot', 'Preboot'] + self.memb_str = 'member' if 'member' in self.oper_type else '' def populate_bgp_state(self): [self.dut_needed.setdefault(vm, self.dut_bgps[vm]) for vm in self.neigh_vms] @@ -141,11 +173,11 @@ def populate_bgp_state(self): elif self.oper_type == 'dut_bgp_down': self.neigh_bgps['changed_state'] = 'Active' self.dut_bgps['changed_state'] = 'Idle' - elif self.oper_type == 'neigh_lag_down': + elif 'neigh_lag' in self.oper_type: # on the DUT side, bgp states are different pre and post boot. hence passing multiple values self.neigh_bgps['changed_state'] = 'Idle' self.dut_bgps['changed_state'] = 'Connect,Active,Idle' - elif self.oper_type == 'dut_lag_down': + elif 'dut_lag' in self.oper_type: self.neigh_bgps['changed_state'] = 'Idle' self.dut_bgps['changed_state'] = 'Active,Connect,Idle' @@ -169,13 +201,22 @@ def sad_setup(self, is_up=True): time.sleep(30) elif 'lag' in self.oper_type: - self.log.append('LAG state change will be for %s' % ", ".join(self.neigh_vms)) - if self.oper_type == 'neigh_lag_down': + self.log.append('LAG %s state change will be for %s' % (self.memb_str, ", ".join(self.neigh_vms))) + if 'neigh_lag' in self.oper_type: for vm in self.neigh_vms: - self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']) - self.vm_handles[vm].change_neigh_lag_state(self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'], is_up=is_up) - elif self.oper_type == 'dut_lag_down': + + # populate entity to be brought down on neigh end (portchannel/portchannel members) + if 'member' in self.oper_type: + down_intfs = self.neigh_lag_members_down[self.neigh_names[vm]] + else: + down_intfs = [self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']] + + self.log.append('Changing state of LAG %s %s to shut' % (self.memb_str, ", ".join(down_intfs))) + self.vm_handles[vm].change_neigh_intfs_state(down_intfs, is_up=is_up) + + elif 'dut_lag' in self.oper_type: self.change_dut_lag_state(is_up=is_up) + # wait for sometime for lag members state to sync time.sleep(120) @@ -234,30 +275,47 @@ def sad_bgp_verify(self): else: self.fails['dut'].add('BGP state not down on DUT') + def populate_lag_member_down(self, neigh_name): + po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] + # build DUT portchannel to down members mapping and neigh name to down members mapping + # if only single member is down, extract the member and convert it into list otherwise assign the list directly + if self.tot_memb_cnt != self.memb_cnt: + self.lag_members_down[po_name] = [self.vm_dut_map[neigh_name]['dut_ports'][self.memb_index]] + self.neigh_lag_members_down[neigh_name] = [self.vm_dut_map[neigh_name]['neigh_ports'][self.memb_index]] + else: + self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + self.neigh_lag_members_down[neigh_name] = self.vm_dut_map[neigh_name]['neigh_ports'] + def populate_lag_state(self): - if self.oper_type == 'neigh_lag_down': - self.neigh_lag_state = 'disabled' - elif self.oper_type == 'dut_lag_down': + if 'neigh_lag' in self.oper_type: + self.neigh_lag_state = 'disabled,notconnect' + elif 'dut_lag' in self.oper_type: self.neigh_lag_state = 'notconnect' for neigh_name in self.neigh_names.values(): - # build portchannel to down members mapping - po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] - self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + self.populate_lag_member_down(neigh_name) def change_dut_lag_state(self, is_up=True): state = ['shutdown', 'startup'] for neigh_name in self.neigh_names.values(): dut_portchannel = self.vm_dut_map[neigh_name]['dut_portchannel'] - if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): continue - self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)]) - if return_code != 0: - self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) - self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) - self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + + # populate the entity that needs to be brought down (portchannel or portchannel member) + if 'member' in self.oper_type: + down_intfs = self.lag_members_down[dut_portchannel] else: - self.log.append('%s: State change successful on DUT for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) + down_intfs = [dut_portchannel] + + for intf in down_intfs: + if not re.match('(PortChannel|Ethernet)\d+', intf): continue + self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)]) + if return_code != 0: + self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + else: + self.log.append('State change successful on DUT for %s' % intf) def verify_dut_lag_member_state(self, match, pre_check=True): success = True @@ -265,10 +323,15 @@ def verify_dut_lag_member_state(self, match, pre_check=True): lag_memb_output = match.group(2) neigh_name = self.po_neigh_map[po_name] for member in self.vm_dut_map[neigh_name]['dut_ports']: - if po_name in self.lag_members_down and member in self.lag_members_down[po_name]: - search_str = '%s(D)' % member - else: - search_str = '%s(S)' % member + # default state for the lag member + search_str = '%s(S)' % member + + if po_name in self.lag_members_down: + if member in self.lag_members_down[po_name]: + search_str = '%s(D)' % member + # single member case. state of non down member of the down portchannel + elif self.tot_memb_cnt != self.memb_cnt: + search_str = '%s(S*)' % member if lag_memb_output.find(search_str) != -1: self.log.append('Lag member %s state as expected' % member) diff --git a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml index bf6f88f113d..5262b0b3172 100644 --- a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml +++ b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml @@ -1,5 +1,5 @@ - set_fact: - item_cnt: "{{ item.split(':')[1]|int }}" + item_cnt: "{{ item.split(':')[-1]|int }}" host_max_len: "{{ vm_hosts|length - 1 }}" member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" diff --git a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml index 9555da8ca35..292a5684a94 100644 --- a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml +++ b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml @@ -3,9 +3,20 @@ reboot_limit: 1 when: reboot_limit is not defined +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1'] + lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" + +- name: Add all lag member down case + set_fact: + pre_list: "{{ pre_list + ['dut_lag_member_down:2:{{ lag_memb_cnt }}', 'neigh_lag_member_down:3:{{ lag_memb_cnt }}']}}" + when: testbed_type in ['t0-64', 't0-116', 't0-64-32'] + - name: Warm-reboot test include: advanced-reboot.yml vars: reboot_type: warm-reboot - preboot_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3'] + preboot_list: "{{ pre_list }}" preboot_files: "peer_dev_info,neigh_port_info"