From bdfa48308f58e7f8420d876ac5f8e5ff631cef2d Mon Sep 17 00:00:00 2001 From: chunangli Date: Sat, 12 Oct 2024 09:32:34 +0800 Subject: [PATCH 01/12] fix az login token expiration issue of azure pipelines polling test plan status Signed-off-by: Chun'ang Li --- .../run-test-elastictest-template.yml | 17 +---- .azure-pipelines/test_plan.py | 62 +++++++++++++------ 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/.azure-pipelines/run-test-elastictest-template.yml b/.azure-pipelines/run-test-elastictest-template.yml index 1c846d3fb12..595a6cb3136 100644 --- a/.azure-pipelines/run-test-elastictest-template.yml +++ b/.azure-pipelines/run-test-elastictest-template.yml @@ -331,12 +331,10 @@ steps: echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " echo -e "\033[33mfor detailed test plan progress \033[0m" # When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED" - echo "[test_plan.py] poll EXECUTING status, timeout 22 hours" - python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state EXECUTING --expected-result ${{ parameters.EXPECTED_RESULT }} --timeout 79200 + echo "[test_plan.py] poll EXECUTING status" + python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state EXECUTING --expected-result ${{ parameters.EXPECTED_RESULT }} RET=$? - # RC==2 means polling test plan timeout, do not consider it as failure so far - if [ $RET -ne 0 ] && [ $RET -ne 2 ]; then - echo "Test plan $TEST_PLAN_ID failed with RC $RET" + if [ $RET -ne 0 ]; then ((failure_count++)) fi done @@ -382,15 +380,6 @@ steps: inlineScript: | set -e echo "Try to cancel test plan $TEST_PLAN_ID, cancelling finished test plan has no effect." - - # If TEST_PLAN_TYPE is NIGHTLY, skip the cancel step - test_plan_type=${{ parameters.TEST_PLAN_TYPE }} - echo "TEST_PLAN_TYPE is $test_plan_type" - if [ "$test_plan_type" == "NIGHTLY" ]; then - echo "TEST_PLAN_TYPE is NIGHTLY, skip the cancel step as a dirty workaround for az login timeout issue" - exit 0 - fi - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" do diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 1d32bc5ec15..06de1215ad9 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -152,8 +152,9 @@ def parse_list_from_str(s): class TestPlanManager(object): - def __init__(self, url, frontend_url, client_id=None): - self.url = url + def __init__(self, scheduler_url, community_url, frontend_url, client_id=None): + self.scheduler_url = scheduler_url + self.community_url = community_url, self.frontend_url = frontend_url self.client_id = client_id self.with_auth = False @@ -216,7 +217,7 @@ def get_token(self): def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params="", kvm_build_id="", min_worker=None, max_worker=None, pr_id="unknown", output=None, common_extra_params="", **kwargs): - tp_url = "{}/test_plan".format(self.url) + tp_url = "{}/test_plan".format(self.scheduler_url) testbed_name = parse_list_from_str(kwargs.get("testbed_name", None)) image_url = kwargs.get("image_url", None) hwsku = kwargs.get("hwsku", None) @@ -348,7 +349,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params def cancel(self, test_plan_id): - tp_url = "{}/test_plan/{}".format(self.url, test_plan_id) + tp_url = "{}/test_plan/{}".format(self.scheduler_url, test_plan_id) cancel_url = "{}/cancel".format(tp_url) print("Cancelling test plan at {}".format(cancel_url)) @@ -378,27 +379,49 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte .format(self.frontend_url, test_plan_id)) print("Polling interval: {} seconds".format(interval)) - poll_url = "{}/test_plan/{}".format(self.url, test_plan_id) + poll_url = "{}/test_plan/{}".format(self.scheduler_url, test_plan_id) + poll_url_no_auth = "{}/test_plan/{}".format(self.community_url, test_plan_id) headers = { "Content-Type": "application/json" } start_time = time.time() http_exception_times = 0 + http_exception_times_no_auth = 0 + failed_poll_auth_url = False while timeout < 0 or (time.time() - start_time) < timeout: - try: - if self.with_auth: - headers["Authorization"] = "Bearer {}".format(self.get_token()) - resp = requests.get(poll_url, headers=headers, timeout=10).json() - except Exception as exception: - print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url, resp, - str(exception))) - http_exception_times = http_exception_times + 1 - if http_exception_times >= TOLERATE_HTTP_EXCEPTION_TIMES: - raise Exception("HTTP execute failure, url: {}, raw_resp: {}, exception: {}" - .format(poll_url, resp, str(exception))) - else: - time.sleep(interval) - continue + # To make the transition smoother, first try to access the original API + if not failed_poll_auth_url: + try: + if self.with_auth: + headers["Authorization"] = "Bearer {}".format(self.get_token()) + resp = requests.get(poll_url, headers=headers, timeout=10).json() + except Exception as exception: + print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url, resp, + str(exception))) + http_exception_times = http_exception_times + 1 + if http_exception_times >= TOLERATE_HTTP_EXCEPTION_TIMES: + failed_poll_auth_url = True + else: + time.sleep(interval) + continue + + # If failed on poll auth url(most likely token has expired), try with no-auth url + else: + try: + resp = requests.get(poll_url_no_auth, headers={"Content-Type": "application/json"}, + timeout=10).json() + except Exception as e: + print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp, + repr(e))) + http_exception_times_no_auth = http_exception_times_no_auth + 1 + if http_exception_times_no_auth >= TOLERATE_HTTP_EXCEPTION_TIMES: + raise Exception( + "HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp, + repr(e))) + else: + time.sleep(interval) + continue + if not resp["success"]: raise Exception("Query test plan at {} failed with error: {}".format(poll_url, resp["errmsg"])) @@ -911,6 +934,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte env = { "elastictest_scheduler_backend_url": os.environ.get("ELASTICTEST_SCHEDULER_BACKEND_URL"), + "elastictest_community_url": os.environ.get("ELASTICTEST_COMMUNITY_URL"), "client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), "frontend_url": os.environ.get("ELASTICTEST_FRONTEND_URL", "https://elastictest.org"), } From 31990bd6f91ba989dd48418c3df095f36080fb42 Mon Sep 17 00:00:00 2001 From: chunangli Date: Sat, 12 Oct 2024 10:45:48 +0800 Subject: [PATCH 02/12] improve test plan progress digit display Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 06de1215ad9..cb5e40b074a 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -112,7 +112,7 @@ def __init__(self): super(ExecutingStatus, self).__init__(TestPlanStatus.EXECUTING) def print_logs(self, test_plan_id, resp_data, start_time): - print("Test plan id: {}, status: {}, progress: {}%, elapsed: {:.0f} seconds" + print("Test plan id: {}, status: {}, progress: {:.2f}%, elapsed: {:.0f} seconds" .format(test_plan_id, resp_data.get("status", None), resp_data.get("progress", 0) * 100, time.time() - start_time)) From 7b255eca83e4c19a26b207cba8bf7224dccca694 Mon Sep 17 00:00:00 2001 From: chunangli Date: Sat, 12 Oct 2024 15:17:14 +0800 Subject: [PATCH 03/12] fix bug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index cb5e40b074a..03d585643aa 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -946,6 +946,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte try: tp = TestPlanManager( env["elastictest_scheduler_backend_url"], + env["elastictest_community_url"], env["frontend_url"], env["client_id"]) From 4730ba2e51ae05c061750d57d0ba10917c71bb87 Mon Sep 17 00:00:00 2001 From: chunangli Date: Mon, 14 Oct 2024 11:42:15 +0800 Subject: [PATCH 04/12] fix bug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 03d585643aa..18cec9e09ea 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -154,7 +154,7 @@ class TestPlanManager(object): def __init__(self, scheduler_url, community_url, frontend_url, client_id=None): self.scheduler_url = scheduler_url - self.community_url = community_url, + self.community_url = community_url self.frontend_url = frontend_url self.client_id = client_id self.with_auth = False @@ -193,7 +193,7 @@ def get_token(self): cmd = 'az account get-access-token --resource {}'.format(self.client_id) attempt = 0 - while (attempt < MAX_GET_TOKEN_RETRY_TIMES): + while attempt < MAX_GET_TOKEN_RETRY_TIMES: try: stdout, _, _ = self.az_run(cmd) @@ -389,6 +389,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte http_exception_times_no_auth = 0 failed_poll_auth_url = False while timeout < 0 or (time.time() - start_time) < timeout: + resp = None # To make the transition smoother, first try to access the original API if not failed_poll_auth_url: try: @@ -407,6 +408,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte # If failed on poll auth url(most likely token has expired), try with no-auth url else: + print("Polling test plan status failed with auth url, try with no-auth url.") try: resp = requests.get(poll_url_no_auth, headers={"Content-Type": "application/json"}, timeout=10).json() @@ -422,6 +424,9 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte time.sleep(interval) continue + if not resp: + raise Exception("Poll test plan status failed with request error, no response!") + if not resp["success"]: raise Exception("Query test plan at {} failed with error: {}".format(poll_url, resp["errmsg"])) From 163f8a7e83ac9852794b6623eaf3cfbfafa91cdc Mon Sep 17 00:00:00 2001 From: chunangli Date: Tue, 15 Oct 2024 12:21:14 +0800 Subject: [PATCH 05/12] fix bug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 18cec9e09ea..77075414d57 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -404,7 +404,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte failed_poll_auth_url = True else: time.sleep(interval) - continue + continue # If failed on poll auth url(most likely token has expired), try with no-auth url else: From 6c5c5000af04bc7bbb8490793f865ce0c125a517 Mon Sep 17 00:00:00 2001 From: chunangli Date: Tue, 15 Oct 2024 12:43:52 +0800 Subject: [PATCH 06/12] fix bug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 77075414d57..f4b07bb2d18 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -379,8 +379,8 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte .format(self.frontend_url, test_plan_id)) print("Polling interval: {} seconds".format(interval)) - poll_url = "{}/test_plan/{}".format(self.scheduler_url, test_plan_id) - poll_url_no_auth = "{}/test_plan/{}".format(self.community_url, test_plan_id) + poll_url = "{}/test_plan/{}/get_test_plan_status".format(self.scheduler_url, test_plan_id) + poll_url_no_auth = "{}/get_test_plan_status/{}".format(self.community_url, test_plan_id) headers = { "Content-Type": "application/json" } From 71f8ef9d8b20ef73d389ca8b3f4c58ca1cd03187 Mon Sep 17 00:00:00 2001 From: chunangli Date: Wed, 16 Oct 2024 16:32:11 +0800 Subject: [PATCH 07/12] debug Signed-off-by: Chun'ang Li --- .../run-test-elastictest-template.yml | 16 +- .azure-pipelines/test_plan.py | 17 +- azure-pipelines.yml | 291 +++++++++--------- 3 files changed, 168 insertions(+), 156 deletions(-) diff --git a/.azure-pipelines/run-test-elastictest-template.yml b/.azure-pipelines/run-test-elastictest-template.yml index 595a6cb3136..78dae979a13 100644 --- a/.azure-pipelines/run-test-elastictest-template.yml +++ b/.azure-pipelines/run-test-elastictest-template.yml @@ -132,7 +132,7 @@ parameters: - name: MAX_RUN_TEST_MINUTES type: number - default: 480 + default: 1800 - name: KVM_IMAGE_BRANCH type: string @@ -166,13 +166,13 @@ steps: curl -u :$(MSSONIC-TOKEN) "${{ parameters.MGMT_URL }}&commitOrBranch=${{ parameters.MGMT_BRANCH }}&api-version=5.0-preview.1&path=.azure-pipelines%2Fpr_test_scripts.yaml" -o ./.azure-pipelines/pr_test_scripts.yaml fi displayName: "Download pr script" - - ${{ else }}: - - ${{ if ne(parameters.MGMT_BRANCH, 'master') }}: - - script: | - # Else, sonic-mgmt repo, if not master branch, need to download test_plan.py - set -ex - curl "https://raw.githubusercontent.com/sonic-net/sonic-mgmt/master/.azure-pipelines/test_plan.py" -o ./.azure-pipelines/test_plan.py - displayName: "Download test plan script" +# - ${{ else }}: +# - ${{ if ne(parameters.MGMT_BRANCH, 'master') }}: +# - script: | +# # Else, sonic-mgmt repo, if not master branch, need to download test_plan.py +# set -ex +# curl "https://raw.githubusercontent.com/sonic-net/sonic-mgmt/master/.azure-pipelines/test_plan.py" -o ./.azure-pipelines/test_plan.py +# displayName: "Download test plan script" - script: | # Check if azure cli is installed. If not, try to install it diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index f4b07bb2d18..9ce8f26ed23 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -307,7 +307,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params "specific_param": kwargs.get("specific_param", []), "affinity": affinity, "deploy_mg_param": deploy_mg_extra_params, - "max_execute_seconds": kwargs.get("max_execute_seconds", None), + "max_execute_seconds": 108000, "dump_kvm_if_fail": kwargs.get("dump_kvm_if_fail", False), }, "type": test_plan_type, @@ -374,7 +374,7 @@ def cancel(self, test_plan_id): print("Result of cancelling test plan at {}:".format(tp_url)) print(str(resp["data"])) - def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expected_result=None): + def poll(self, test_plan_id, interval=1800, timeout=-1, expected_state="", expected_result=None): print("Polling progress and status of test plan at {}/scheduler/testplan/{}" .format(self.frontend_url, test_plan_id)) print("Polling interval: {} seconds".format(interval)) @@ -396,6 +396,8 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte if self.with_auth: headers["Authorization"] = "Bearer {}".format(self.get_token()) resp = requests.get(poll_url, headers=headers, timeout=10).json() + print("request url: ", poll_url) + print("response: ", resp) except Exception as exception: print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url, resp, str(exception))) @@ -412,6 +414,8 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte try: resp = requests.get(poll_url_no_auth, headers={"Content-Type": "application/json"}, timeout=10).json() + print("request url: ", poll_url_no_auth) + print("response: ", resp) except Exception as e: print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp, repr(e))) @@ -499,7 +503,14 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte test_plan_id)) print("Current step status is {}".format(step_status)) - return + # Check if the run test step has been running for more than 24 hours + # Make run test to hit 24h token issue + if expected_state == "EXECUTING": + if time.time() - start_time > 24 * 3600: # 24 hours in seconds + print("Run test has been running for more than 24 hours.") + return + else: + return else: print("Current test plan state is {}, waiting for the expected state {}".format(current_tp_status, expected_state)) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 90917d89198..fb95cd3f59f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -59,160 +59,161 @@ stages: jobs: - job: t0_elastictest displayName: "kvmtest-t0 by Elastictest" - timeoutInMinutes: 240 + timeoutInMinutes: 1800 continueOnError: false pool: sonic-ubuntu-1c steps: - template: .azure-pipelines/run-test-elastictest-template.yml parameters: TOPOLOGY: t0 - MIN_WORKER: $(T0_INSTANCE_NUM) - MAX_WORKER: $(T0_INSTANCE_NUM) + MIN_WORKER: 1 + MAX_WORKER: 1 KVM_IMAGE_BRANCH: $(BUILD_BRANCH) MGMT_BRANCH: $(BUILD_BRANCH) + MAX_RUN_TEST_MINUTES: 1800 - - job: t0_2vlans_elastictest - displayName: "kvmtest-t0-2vlans by Elastictest" - timeoutInMinutes: 240 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: t0 - TEST_SET: t0-2vlans - MIN_WORKER: $(T0_2VLANS_INSTANCE_NUM) - MAX_WORKER: $(T0_2VLANS_INSTANCE_NUM) - DEPLOY_MG_EXTRA_PARAMS: "-e vlan_config=two_vlan_a" - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - - - job: t1_lag_elastictest - displayName: "kvmtest-t1-lag by Elastictest" - timeoutInMinutes: 240 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: t1-lag - MIN_WORKER: $(T1_LAG_INSTANCE_NUM) - MAX_WORKER: $(T1_LAG_INSTANCE_NUM) - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - - - job: dualtor_elastictest - displayName: "kvmtest-dualtor-t0 by Elastictest" - timeoutInMinutes: 240 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: dualtor - MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) - MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) - COMMON_EXTRA_PARAMS: "--disable_loganalyzer " - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - - - job: multi_asic_elastictest - displayName: "kvmtest-multi-asic-t1-lag by Elastictest" - timeoutInMinutes: 240 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: t1-8-lag - TEST_SET: multi-asic-t1-lag - MIN_WORKER: $(MULTI_ASIC_INSTANCE_NUM) - MAX_WORKER: $(MULTI_ASIC_INSTANCE_NUM) - NUM_ASIC: 4 - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - - - job: sonic_t0_elastictest - displayName: "kvmtest-t0-sonic by Elastictest" - timeoutInMinutes: 240 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: t0-64-32 - MIN_WORKER: $(T0_SONIC_INSTANCE_NUM) - MAX_WORKER: $(T0_SONIC_INSTANCE_NUM) - TEST_SET: t0-sonic - COMMON_EXTRA_PARAMS: "--neighbor_type=sonic " - VM_TYPE: vsonic - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - - - job: dpu_elastictest - displayName: "kvmtest-dpu by Elastictest" - timeoutInMinutes: 240 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: dpu - MIN_WORKER: $(T0_SONIC_INSTANCE_NUM) - MAX_WORKER: $(T0_SONIC_INSTANCE_NUM) - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - - - job: onboarding_elastictest_t0 - displayName: "onboarding t0 testcases by Elastictest - optional" - timeoutInMinutes: 240 - continueOnError: true - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: t0 - STOP_ON_FAILURE: "False" - RETRY_TIMES: 0 - MIN_WORKER: $(T0_ONBOARDING_SONIC_INSTANCE_NUM) - MAX_WORKER: $(T0_ONBOARDING_SONIC_INSTANCE_NUM) - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - TEST_SET: onboarding_t0 - - - job: onboarding_elastictest_t1 - displayName: "onboarding t1 testcases by Elastictest - optional" - timeoutInMinutes: 240 - continueOnError: true - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: t1-lag - STOP_ON_FAILURE: "False" - RETRY_TIMES: 0 - MIN_WORKER: $(T1_LAG_ONBOARDING_INSTANCE_NUM) - MAX_WORKER: $(T1_LAG_ONBOARDING_INSTANCE_NUM) - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - TEST_SET: onboarding_t1 - - - job: onboarding_elastictest_dualtor - displayName: "onboarding dualtor testcases by Elastictest - optional" - timeoutInMinutes: 240 - continueOnError: true - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/run-test-elastictest-template.yml - parameters: - TOPOLOGY: dualtor - STOP_ON_FAILURE: "False" - RETRY_TIMES: 0 - MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) - MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) - KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) - TEST_SET: onboarding_dualtor +# - job: t0_2vlans_elastictest +# displayName: "kvmtest-t0-2vlans by Elastictest" +# timeoutInMinutes: 240 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: t0 +# TEST_SET: t0-2vlans +# MIN_WORKER: $(T0_2VLANS_INSTANCE_NUM) +# MAX_WORKER: $(T0_2VLANS_INSTANCE_NUM) +# DEPLOY_MG_EXTRA_PARAMS: "-e vlan_config=two_vlan_a" +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# +# - job: t1_lag_elastictest +# displayName: "kvmtest-t1-lag by Elastictest" +# timeoutInMinutes: 240 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: t1-lag +# MIN_WORKER: $(T1_LAG_INSTANCE_NUM) +# MAX_WORKER: $(T1_LAG_INSTANCE_NUM) +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# +# - job: dualtor_elastictest +# displayName: "kvmtest-dualtor-t0 by Elastictest" +# timeoutInMinutes: 240 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: dualtor +# MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) +# MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) +# COMMON_EXTRA_PARAMS: "--disable_loganalyzer " +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# +# - job: multi_asic_elastictest +# displayName: "kvmtest-multi-asic-t1-lag by Elastictest" +# timeoutInMinutes: 240 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: t1-8-lag +# TEST_SET: multi-asic-t1-lag +# MIN_WORKER: $(MULTI_ASIC_INSTANCE_NUM) +# MAX_WORKER: $(MULTI_ASIC_INSTANCE_NUM) +# NUM_ASIC: 4 +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# +# - job: sonic_t0_elastictest +# displayName: "kvmtest-t0-sonic by Elastictest" +# timeoutInMinutes: 240 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: t0-64-32 +# MIN_WORKER: $(T0_SONIC_INSTANCE_NUM) +# MAX_WORKER: $(T0_SONIC_INSTANCE_NUM) +# TEST_SET: t0-sonic +# COMMON_EXTRA_PARAMS: "--neighbor_type=sonic " +# VM_TYPE: vsonic +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# +# - job: dpu_elastictest +# displayName: "kvmtest-dpu by Elastictest" +# timeoutInMinutes: 240 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: dpu +# MIN_WORKER: $(T0_SONIC_INSTANCE_NUM) +# MAX_WORKER: $(T0_SONIC_INSTANCE_NUM) +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# +# - job: onboarding_elastictest_t0 +# displayName: "onboarding t0 testcases by Elastictest - optional" +# timeoutInMinutes: 240 +# continueOnError: true +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: t0 +# STOP_ON_FAILURE: "False" +# RETRY_TIMES: 0 +# MIN_WORKER: $(T0_ONBOARDING_SONIC_INSTANCE_NUM) +# MAX_WORKER: $(T0_ONBOARDING_SONIC_INSTANCE_NUM) +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# TEST_SET: onboarding_t0 +# +# - job: onboarding_elastictest_t1 +# displayName: "onboarding t1 testcases by Elastictest - optional" +# timeoutInMinutes: 240 +# continueOnError: true +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: t1-lag +# STOP_ON_FAILURE: "False" +# RETRY_TIMES: 0 +# MIN_WORKER: $(T1_LAG_ONBOARDING_INSTANCE_NUM) +# MAX_WORKER: $(T1_LAG_ONBOARDING_INSTANCE_NUM) +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# TEST_SET: onboarding_t1 +# +# - job: onboarding_elastictest_dualtor +# displayName: "onboarding dualtor testcases by Elastictest - optional" +# timeoutInMinutes: 240 +# continueOnError: true +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/run-test-elastictest-template.yml +# parameters: +# TOPOLOGY: dualtor +# STOP_ON_FAILURE: "False" +# RETRY_TIMES: 0 +# MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) +# MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) +# KVM_IMAGE_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: $(BUILD_BRANCH) +# TEST_SET: onboarding_dualtor # - job: wan_elastictest # displayName: "kvmtest-wan by Elastictest" From 5158f8a59a2e72715c5ebf2e40b7789a55c2256d Mon Sep 17 00:00:00 2001 From: chunangli Date: Wed, 16 Oct 2024 16:49:49 +0800 Subject: [PATCH 08/12] debug Signed-off-by: Chun'ang Li --- azure-pipelines.yml | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index fb95cd3f59f..9d1212b2638 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -23,27 +23,27 @@ trigger: none name: $(TeamProject)_$(Build.DefinitionName)_$(SourceBranchName)_$(Date:yyyyMMdd)$(Rev:.r) stages: -- stage: Pre_test - jobs: - - job: static_analysis - displayName: "Static Analysis" - timeoutInMinutes: 10 - continueOnError: false - pool: sonic-ubuntu-1c - steps: - - template: .azure-pipelines/pre-commit-check.yml - - - job: validate_test_cases - displayName: "Validate Test Cases" - timeoutInMinutes: 20 - continueOnError: false - pool: sonic-common - steps: - - template: .azure-pipelines/pytest-collect-only.yml +#- stage: Pre_test +# jobs: +# - job: static_analysis +# displayName: "Static Analysis" +# timeoutInMinutes: 10 +# continueOnError: false +# pool: sonic-ubuntu-1c +# steps: +# - template: .azure-pipelines/pre-commit-check.yml +# +# - job: validate_test_cases +# displayName: "Validate Test Cases" +# timeoutInMinutes: 20 +# continueOnError: false +# pool: sonic-common +# steps: +# - template: .azure-pipelines/pytest-collect-only.yml - stage: Test - dependsOn: Pre_test - condition: and(succeeded(), in(dependencies.Pre_test.result, 'Succeeded')) +# dependsOn: Pre_test +# condition: and(succeeded(), in(dependencies.Pre_test.result, 'Succeeded')) variables: - group: SONiC-Elastictest - name: inventory From b85dfb0b03bbd720d1a6ae6ca2e8ac0d9a122a4e Mon Sep 17 00:00:00 2001 From: chunangli Date: Wed, 16 Oct 2024 17:20:06 +0800 Subject: [PATCH 09/12] debug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 2 +- azure-pipelines.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 9ce8f26ed23..cda4731bd87 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -259,7 +259,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params # If triggered by mgmt repo, use pull request id as the code base sonic_mgmt_pull_request_id = "" if MGMT_REPO_FLAG in kwargs.get("source_repo"): - sonic_mgmt_pull_request_id = pr_id + sonic_mgmt_pull_request_id = 15016 # If triggered by buildimage repo, use image built from the buildId kvm_image_build_id = kvm_build_id diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9d1212b2638..79ee497a093 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -60,7 +60,7 @@ stages: - job: t0_elastictest displayName: "kvmtest-t0 by Elastictest" timeoutInMinutes: 1800 - continueOnError: false + continueOnError: true pool: sonic-ubuntu-1c steps: - template: .azure-pipelines/run-test-elastictest-template.yml From ce0118bc9b518faf43a3421659de7feb14c15e84 Mon Sep 17 00:00:00 2001 From: chunangli Date: Wed, 16 Oct 2024 17:27:16 +0800 Subject: [PATCH 10/12] debug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index cda4731bd87..eb72e2533b0 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -267,6 +267,8 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params if BUILDIMAGE_REPO_FLAG in kwargs.get("source_repo"): kvm_image_build_id = build_id kvm_image_branch = "" + + print(kvm_image_branch) affinity = json.loads(kwargs.get("affinity", "[]")) payload = { "name": test_plan_name, @@ -296,7 +298,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params "upgrade_image_param": kwargs.get("upgrade_image_param", None), "release": "", "kvm_image_build_id": kvm_image_build_id, - "kvm_image_branch": kvm_image_branch + "kvm_image_branch": "master" }, "sonic_mgmt": { "repo_url": sonic_mgmt_repo_url, From 56b20797e1b62c2a32a2e82f3d69e0da3875dffc Mon Sep 17 00:00:00 2001 From: chunangli Date: Wed, 16 Oct 2024 21:40:07 +0800 Subject: [PATCH 11/12] debug Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index eb72e2533b0..9fca6254d37 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -284,7 +284,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params "lock_wait_timeout_seconds": kwargs.get("lock_wait_timeout_seconds", None), }, "test_option": { - "stop_on_failure": kwargs.get("stop_on_failure", True), + "stop_on_failure": False, "retry_times": kwargs.get("retry_times", 2), "test_cases": { "features": features, From da52927de7c1872b2def4c14cb8adc377e8089a2 Mon Sep 17 00:00:00 2001 From: chunangli Date: Wed, 16 Oct 2024 21:47:51 +0800 Subject: [PATCH 12/12] debug Signed-off-by: Chun'ang Li --- .azure-pipelines/pr_test_scripts.yaml | 1 + .azure-pipelines/test_plan.py | 2 +- tests/autorestart/test_sleep_2_hours.py | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tests/autorestart/test_sleep_2_hours.py diff --git a/.azure-pipelines/pr_test_scripts.yaml b/.azure-pipelines/pr_test_scripts.yaml index 560a39edb15..5e1203b553e 100644 --- a/.azure-pipelines/pr_test_scripts.yaml +++ b/.azure-pipelines/pr_test_scripts.yaml @@ -11,6 +11,7 @@ t0: - arp/test_wr_arp.py - arp/test_unknown_mac.py - autorestart/test_container_autorestart.py + - autorestart/test_sleep_2_hours.py - bgp/test_bgp_dual_asn.py - bgp/test_bgp_fact.py - bgp/test_bgp_gr_helper.py diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 9fca6254d37..2a0ee43d51e 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -919,7 +919,7 @@ def poll(self, test_plan_id, interval=1800, timeout=-1, expected_state="", expec "--interval", type=int, required=False, - default=60, + default=600, dest="interval", help="Polling interval. Default 60 seconds." ) diff --git a/tests/autorestart/test_sleep_2_hours.py b/tests/autorestart/test_sleep_2_hours.py new file mode 100644 index 00000000000..87f58e929ac --- /dev/null +++ b/tests/autorestart/test_sleep_2_hours.py @@ -0,0 +1,14 @@ +import time + +import pytest + +pytestmark = [ + pytest.mark.topology('any'), + pytest.mark.device_type('vs') +] + + +def test_bgp_facts(): + print("start to sleep 2 hours...") + time.sleep(2 * 60 * 60) + print("end to sleep 2 hours.")