From 9801c0f57736290186b3a87d8e3cde11a2b9fff6 Mon Sep 17 00:00:00 2001 From: chunangli Date: Thu, 31 Oct 2024 14:53:07 +0800 Subject: [PATCH 1/7] enhance elastictest template, use bash script instead of azcli task, improve and fix azlogin and get token when requesting APIs Signed-off-by: Chun'ang Li --- .../run-test-elastictest-template.yml | 336 ++++++++---------- .azure-pipelines/test_plan.py | 319 ++++++++--------- 2 files changed, 302 insertions(+), 353 deletions(-) diff --git a/.azure-pipelines/run-test-elastictest-template.yml b/.azure-pipelines/run-test-elastictest-template.yml index 595a6cb3136..ebd09be86b2 100644 --- a/.azure-pipelines/run-test-elastictest-template.yml +++ b/.azure-pipelines/run-test-elastictest-template.yml @@ -184,206 +184,176 @@ steps: fi displayName: "Install azure-cli" - - task: AzureCLI@2 - inputs: - azureSubscription: "SONiC-Automation" - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -e - - pip install PyYAML - - rm -f new_test_plan_id.txt - - python ./.azure-pipelines/test_plan.py create \ - -t ${{ parameters.TOPOLOGY }} \ - -o new_test_plan_id.txt \ - --min-worker ${{ parameters.MIN_WORKER }} \ - --max-worker ${{ parameters.MAX_WORKER }} \ - --lock-wait-timeout-seconds ${{ parameters.LOCK_WAIT_TIMEOUT_SECONDS }} \ - --test-set ${{ parameters.TEST_SET }} \ - --kvm-build-id $(KVM_BUILD_ID) \ - --kvm-image-branch "${{ parameters.KVM_IMAGE_BRANCH }}" \ - --deploy-mg-extra-params="${{ parameters.DEPLOY_MG_EXTRA_PARAMS }}" \ - --common-extra-params="${{ parameters.COMMON_EXTRA_PARAMS }}" \ - --vm-type ${{ parameters.VM_TYPE }} --num-asic ${{ parameters.NUM_ASIC }} \ - --ptf_image_tag ${{ parameters.PTF_IMAGE_TAG }} \ - --image_url ${{ parameters.IMAGE_URL }} \ - --upgrade-image-param="${{ parameters.UPGRADE_IMAGE_PARAM }}" \ - --hwsku ${{ parameters.HWSKU }} \ - --test-plan-type ${{ parameters.TEST_PLAN_TYPE }} \ - --platform ${{ parameters.PLATFORM }} \ - --testbed-name "${{ parameters.TESTBED_NAME }}" \ - --scripts "${{ parameters.SCRIPTS }}" \ - --features "${{ parameters.FEATURES }}" \ - --scripts-exclude "${{ parameters.SCRIPTS_EXCLUDE }}" \ - --features-exclude "${{ parameters.FEATURES_EXCLUDE }}" \ - --specific-param='${{ parameters.SPECIFIC_PARAM }}' \ - --affinity='${{ parameters.AFFINITY }}' \ - --build-reason ${{ parameters.BUILD_REASON }} \ - --repo-name ${{ parameters.REPO_NAME }} \ - --mgmt-branch ${{ parameters.MGMT_BRANCH }} \ - --stop-on-failure ${{ parameters.STOP_ON_FAILURE }} \ - --retry-times ${{ parameters.RETRY_TIMES }} \ - --dump-kvm-if-fail ${{ parameters.DUMP_KVM_IF_FAIL }} \ - --requester "${{ parameters.REQUESTER }}" \ - --max-execute-seconds $((${{ parameters.MAX_RUN_TEST_MINUTES }} * 60)) \ - --test-plan-num ${{ parameters.TEST_PLAN_NUM }} - - TEST_PLAN_ID_LIST=( $(cat new_test_plan_id.txt) ) - echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - echo "Created test plan $TEST_PLAN_ID" - echo -e -n "\033[33mPlease visit Elastictest page \033[0m" - echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " - echo -e "\033[33mfor detailed test plan progress \033[0m" - done - TEST_PLAN_ID_LIST_STRING=$(printf "%s," "${TEST_PLAN_ID_LIST[@]}") - TEST_PLAN_ID_LIST_STRING=${TEST_PLAN_ID_LIST_STRING%,} - echo "##vso[task.setvariable variable=TEST_PLAN_ID_LIST_STRING]$TEST_PLAN_ID_LIST_STRING" + - script: | + set -e + + pip install PyYAML + + rm -f new_test_plan_id.txt + + python ./.azure-pipelines/test_plan.py create \ + -t ${{ parameters.TOPOLOGY }} \ + -o new_test_plan_id.txt \ + --min-worker ${{ parameters.MIN_WORKER }} \ + --max-worker ${{ parameters.MAX_WORKER }} \ + --lock-wait-timeout-seconds ${{ parameters.LOCK_WAIT_TIMEOUT_SECONDS }} \ + --test-set ${{ parameters.TEST_SET }} \ + --kvm-build-id $(KVM_BUILD_ID) \ + --kvm-image-branch "${{ parameters.KVM_IMAGE_BRANCH }}" \ + --deploy-mg-extra-params="${{ parameters.DEPLOY_MG_EXTRA_PARAMS }}" \ + --common-extra-params="${{ parameters.COMMON_EXTRA_PARAMS }}" \ + --vm-type ${{ parameters.VM_TYPE }} --num-asic ${{ parameters.NUM_ASIC }} \ + --ptf_image_tag ${{ parameters.PTF_IMAGE_TAG }} \ + --image_url ${{ parameters.IMAGE_URL }} \ + --upgrade-image-param="${{ parameters.UPGRADE_IMAGE_PARAM }}" \ + --hwsku ${{ parameters.HWSKU }} \ + --test-plan-type ${{ parameters.TEST_PLAN_TYPE }} \ + --platform ${{ parameters.PLATFORM }} \ + --testbed-name "${{ parameters.TESTBED_NAME }}" \ + --scripts "${{ parameters.SCRIPTS }}" \ + --features "${{ parameters.FEATURES }}" \ + --scripts-exclude "${{ parameters.SCRIPTS_EXCLUDE }}" \ + --features-exclude "${{ parameters.FEATURES_EXCLUDE }}" \ + --specific-param='${{ parameters.SPECIFIC_PARAM }}' \ + --affinity='${{ parameters.AFFINITY }}' \ + --build-reason ${{ parameters.BUILD_REASON }} \ + --repo-name ${{ parameters.REPO_NAME }} \ + --mgmt-branch ${{ parameters.MGMT_BRANCH }} \ + --stop-on-failure ${{ parameters.STOP_ON_FAILURE }} \ + --retry-times ${{ parameters.RETRY_TIMES }} \ + --dump-kvm-if-fail ${{ parameters.DUMP_KVM_IF_FAIL }} \ + --requester "${{ parameters.REQUESTER }}" \ + --max-execute-seconds $((${{ parameters.MAX_RUN_TEST_MINUTES }} * 60)) \ + --test-plan-num ${{ parameters.TEST_PLAN_NUM }} + + TEST_PLAN_ID_LIST=( $(cat new_test_plan_id.txt) ) + echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" + for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" + do + echo "Created test plan $TEST_PLAN_ID" + echo -e -n "\033[33mPlease visit Elastictest page \033[0m" + echo -n "$(ELASTICTEST_FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + echo -e "\033[33mfor detailed test plan progress \033[0m" + done + TEST_PLAN_ID_LIST_STRING=$(printf "%s," "${TEST_PLAN_ID_LIST[@]}") + TEST_PLAN_ID_LIST_STRING=${TEST_PLAN_ID_LIST_STRING%,} + echo "##vso[task.setvariable variable=TEST_PLAN_ID_LIST_STRING]$TEST_PLAN_ID_LIST_STRING" displayName: "Trigger test" - - task: AzureCLI@2 - inputs: - azureSubscription: "SONiC-Automation" - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -o - echo "Lock testbed" - - echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" - failure_count=0 - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - echo -e -n "\033[33mPlease visit Elastictest page \033[0m" - echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " - echo -e "\033[33mfor detailed test plan progress \033[0m" - # When "LOCK_TESTBED" finish, it changes into "PREPARE_TESTBED" - echo "[test_plan.py] poll LOCK_TESTBED status" - python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state LOCK_TESTBED - RET=$? - if [ $RET -ne 0 ]; then - ((failure_count++)) - fi - done - - if [ $failure_count -eq ${#TEST_PLAN_ID_LIST[@]} ]; then - echo "All testplan failed, cancel following steps" - exit 3 - fi + - script: | + set -o + echo "Lock testbed" + + echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" + IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" + failure_count=0 + for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" + do + echo -e -n "\033[33mPlease visit Elastictest page \033[0m" + echo -n "$(ELASTICTEST_FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + echo -e "\033[33mfor detailed test plan progress \033[0m" + # When "LOCK_TESTBED" finish, it changes into "PREPARE_TESTBED" + echo "[test_plan.py] poll LOCK_TESTBED status" + python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state LOCK_TESTBED + RET=$? + if [ $RET -ne 0 ]; then + ((failure_count++)) + fi + done + + if [ $failure_count -eq ${#TEST_PLAN_ID_LIST[@]} ]; then + echo "All testplan failed, cancel following steps" + exit 3 + fi displayName: "Lock testbed" - - task: AzureCLI@2 - inputs: - azureSubscription: "SONiC-Automation" - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -o - echo "Prepare testbed" - echo "Preparing the testbed(add-topo, deploy-mg) may take 15-30 minutes. Before the testbed is ready, the progress of the test plan keeps displayed as 0, please be patient" - - echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" - failure_count=0 - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - echo -e -n "\033[33mPlease visit Elastictest page \033[0m" - echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " - echo -e "\033[33mfor detailed test plan progress \033[0m" - # When "PREPARE_TESTBED" finish, it changes into "EXECUTING" - echo "[test_plan.py] poll PREPARE_TESTBED status" - python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state PREPARE_TESTBED - RET=$? - if [ $RET -ne 0 ]; then - ((failure_count++)) - fi - done - - if [ "$failure_count" -eq ${#TEST_PLAN_ID_LIST[@]} ]; then - echo "All testplan failed, cancel following steps" - exit 3 - fi + - script: | + set -o + echo "Prepare testbed" + echo "Preparing the testbed(add-topo, deploy-mg) may take 15-30 minutes. Before the testbed is ready, the progress of the test plan keeps displayed as 0, please be patient" + + echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" + IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" + failure_count=0 + for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" + do + echo -e -n "\033[33mPlease visit Elastictest page \033[0m" + echo -n "$(ELASTICTEST_FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + echo -e "\033[33mfor detailed test plan progress \033[0m" + # When "PREPARE_TESTBED" finish, it changes into "EXECUTING" + echo "[test_plan.py] poll PREPARE_TESTBED status" + python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state PREPARE_TESTBED + RET=$? + if [ $RET -ne 0 ]; then + ((failure_count++)) + fi + done + + if [ "$failure_count" -eq ${#TEST_PLAN_ID_LIST[@]} ]; then + echo "All testplan failed, cancel following steps" + exit 3 + fi displayName: "Prepare testbed" - - task: AzureCLI@2 - inputs: - azureSubscription: "SONiC-Automation" - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -o - echo "Run test" - - echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" - failure_count=0 - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - echo -e -n "\033[33mPlease visit Elastictest page \033[0m" - echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " - echo -e "\033[33mfor detailed test plan progress \033[0m" - # When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED" - echo "[test_plan.py] poll EXECUTING status" - python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state EXECUTING --expected-result ${{ parameters.EXPECTED_RESULT }} - RET=$? - if [ $RET -ne 0 ]; then - ((failure_count++)) - fi - done - - if [ $failure_count -eq ${#TEST_PLAN_ID_LIST[@]} ]; then - echo "All testplan failed, cancel following steps" - exit 3 - fi + - script: | + set -o + echo "Run test" + + echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" + IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" + failure_count=0 + for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" + do + echo -e -n "\033[33mPlease visit Elastictest page \033[0m" + echo -n "$(ELASTICTEST_FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + echo -e "\033[33mfor detailed test plan progress \033[0m" + # When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED" + echo "[test_plan.py] poll EXECUTING status" + python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state EXECUTING --expected-result ${{ parameters.EXPECTED_RESULT }} + RET=$? + if [ $RET -ne 0 ]; then + ((failure_count++)) + fi + done + + if [ $failure_count -eq ${#TEST_PLAN_ID_LIST[@]} ]; then + echo "All testplan failed, cancel following steps" + exit 3 + fi displayName: "Run test" timeoutInMinutes: ${{ parameters.MAX_RUN_TEST_MINUTES }} - ${{ if eq(parameters.DUMP_KVM_IF_FAIL, 'True') }}: - - task: AzureCLI@2 - inputs: - azureSubscription: "SONiC-Automation" - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -e - echo "KVM dump" - - echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - echo -e -n "\033[33mPlease visit Elastictest page \033[0m" - echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " - echo -e "\033[33mfor detailed test plan progress \033[0m" - # When "KVMDUMP" finish, it changes into "FAILED", "CANCELLED" or "FINISHED" - echo "##[group][test_plan.py] poll KVMDUMP status" - python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state KVMDUMP - done + - script: | + set -e + echo "KVM dump" + + echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" + IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" + for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" + do + echo -e -n "\033[33mPlease visit Elastictest page \033[0m" + echo -n "$(ELASTICTEST_FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " + echo -e "\033[33mfor detailed test plan progress \033[0m" + # When "KVMDUMP" finish, it changes into "FAILED", "CANCELLED" or "FINISHED" + echo "##[group][test_plan.py] poll KVMDUMP status" + python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state KVMDUMP + done condition: succeededOrFailed() displayName: "KVM dump" - - task: AzureCLI@2 - inputs: - azureSubscription: "SONiC-Automation" - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -e - echo "Try to cancel test plan $TEST_PLAN_ID, cancelling finished test plan has no effect." - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - python ./.azure-pipelines/test_plan.py cancel -i $TEST_PLAN_ID - done + - script: | + set -e + echo "Try to cancel test plan $TEST_PLAN_ID, cancelling finished test plan has no effect." + IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" + for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" + do + python ./.azure-pipelines/test_plan.py cancel -i $TEST_PLAN_ID + done condition: always() displayName: "Finalize running test plan" diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index f4b07bb2d18..7ec72614d10 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -8,7 +8,7 @@ import subprocess import copy import time -from datetime import datetime, timedelta +from datetime import datetime, timezone import requests import yaml @@ -22,8 +22,7 @@ INTERNAL_SONIC_MGMT_REPO = "https://dev.azure.com/mssonic/internal/_git/sonic-mgmt-int" PR_TEST_SCRIPTS_FILE = "pr_test_scripts.yaml" SPECIFIC_PARAM_KEYWORD = "specific_param" -TOLERATE_HTTP_EXCEPTION_TIMES = 20 -TOKEN_EXPIRE_HOURS = 1 +MAX_POLL_RETRY_TIMES = 10 MAX_GET_TOKEN_RETRY_TIMES = 3 TEST_PLAN_STATUS_UNSUCCESSFUL_FINISHED = ["FAILED", "CANCELLED"] TEST_PLAN_STEP_STATUS_UNFINISHED = ["EXECUTING", None] @@ -83,13 +82,15 @@ def __init__(self, status): def get_status(self): return self.status.value - def print_logs(self, test_plan_id, resp_data, start_time): + def print_logs(self, test_plan_id, resp_data, expected_status, start_time): status = resp_data.get("status", None) current_status = test_plan_status_factory(status).get_status() if current_status == self.get_status(): - print("Test plan id: {}, status: {}, elapsed: {:.0f} seconds" - .format(test_plan_id, resp_data.get("status", None), time.time() - start_time)) + print( + f"Test plan id: {test_plan_id}, status: {resp_data.get('status', None)}, " + f"expected_status: {expected_status}, elapsed: {time.time() - start_time:.0f} seconds" + ) class InitStatus(AbstractStatus): @@ -111,10 +112,12 @@ class ExecutingStatus(AbstractStatus): def __init__(self): super(ExecutingStatus, self).__init__(TestPlanStatus.EXECUTING) - def print_logs(self, test_plan_id, resp_data, start_time): - print("Test plan id: {}, status: {}, progress: {:.2f}%, elapsed: {:.0f} seconds" - .format(test_plan_id, resp_data.get("status", None), - resp_data.get("progress", 0) * 100, time.time() - start_time)) + def print_logs(self, test_plan_id, resp_data, expected_status, start_time): + print( + f"Test plan id: {test_plan_id}, status: {resp_data.get('status', None)}, " + f"expected_status: {expected_status}, progress: {resp_data.get('progress', 0) * 100:.2f}%, " + f"elapsed: {time.time() - start_time:.0f} seconds" + ) class KvmDumpStatus(AbstractStatus): @@ -150,74 +153,81 @@ def parse_list_from_str(s): if single_str.strip()] +def run_cmd(cmd): + process = subprocess.Popen( + cmd.split(), + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + return_code = process.returncode + + if return_code != 0: + raise Exception(f'Command {cmd} execution failed, rc={return_code}, error={stderr}') + return stdout, stderr, return_code + + class TestPlanManager(object): - def __init__(self, scheduler_url, community_url, frontend_url, client_id=None): + def __init__(self, scheduler_url, frontend_url, elastictest_msal_client_id, sonic_automation_umi): self.scheduler_url = scheduler_url - self.community_url = community_url self.frontend_url = frontend_url - self.client_id = client_id - self.with_auth = False - self._token = None - self._token_expires_on = None - if self.client_id: - self.with_auth = True - self.get_token() - - def cmd(self, cmds): - process = subprocess.Popen( - cmds, - shell=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE - ) - stdout, stderr = process.communicate() - return_code = process.returncode - - return stdout, stderr, return_code - - def az_run(self, cmd): - stdout, stderr, retcode = self.cmd(cmd.split()) - if retcode != 0: - raise Exception(f'Command {cmd} execution failed, rc={retcode}, error={stderr}') - return stdout, stderr, retcode + self.elastictest_msal_client_id = elastictest_msal_client_id + self.sonic_automation_umi = sonic_automation_umi def get_token(self): - token_is_valid = \ - self._token_expires_on is not None and \ - (self._token_expires_on - datetime.now()) > timedelta(hours=TOKEN_EXPIRE_HOURS) + # 1. Run az login with re-try + az_login_cmd = f"az login --identity --username {self.sonic_automation_umi}" + az_login_attempts = 0 + while az_login_attempts < MAX_GET_TOKEN_RETRY_TIMES: + try: + stdout, _, _ = run_cmd(az_login_cmd) + print(f"Az login successfully. Login time: {datetime.now(timezone.utc)}") + break + except Exception as exception: + az_login_attempts += 1 + print( + f"Failed to az login with exception: {repr(exception)}. " + f"Retry {MAX_GET_TOKEN_RETRY_TIMES - az_login_attempts} times to login." + ) - if self._token is not None and token_is_valid: - return self._token + # If az login failed, return with exception + if az_login_attempts >= MAX_GET_TOKEN_RETRY_TIMES: + raise Exception(f"Failed to az login after {MAX_GET_TOKEN_RETRY_TIMES} attempts.") - cmd = 'az account get-access-token --resource {}'.format(self.client_id) - attempt = 0 - while attempt < MAX_GET_TOKEN_RETRY_TIMES: + # 2. Get access token with re-try + get_token_cmd = f"az account get-access-token --resource {self.elastictest_msal_client_id}" + get_token_attempts = 0 + while get_token_attempts < MAX_GET_TOKEN_RETRY_TIMES: try: - stdout, _, _ = self.az_run(cmd) + stdout, _, _ = run_cmd(get_token_cmd) token = json.loads(stdout.decode("utf-8")) - self._token = token.get("accessToken", None) - if not self._token: - raise Exception("Parse token from stdout failed") + access_token = token.get("accessToken", None) + if not access_token: + raise Exception("Parse token from stdout failed, accessToken is None.") # Parse token expires time from string token_expires_on = token.get("expiresOn", "") - self._token_expires_on = datetime.strptime(token_expires_on, "%Y-%m-%d %H:%M:%S.%f") - print("Get token successfully.") - return self._token + if token_expires_on: + print(f"Get token successfully. Token will expire on {token_expires_on}.") + + return access_token except Exception as exception: - attempt += 1 - print("Failed to get token with exception: {}".format(repr(exception))) + get_token_attempts += 1 + print(f"Failed to get token with exception: {repr(exception)}.") - raise Exception("Failed to get token after {} attempts".format(MAX_GET_TOKEN_RETRY_TIMES)) + # If az get token failed, return with exception + if get_token_attempts >= MAX_GET_TOKEN_RETRY_TIMES: + raise Exception(f"Failed to get token after {MAX_GET_TOKEN_RETRY_TIMES} attempts") def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params="", kvm_build_id="", min_worker=None, max_worker=None, pr_id="unknown", output=None, common_extra_params="", **kwargs): - tp_url = "{}/test_plan".format(self.scheduler_url) + tp_url = f"{self.scheduler_url}/test_plan" testbed_name = parse_list_from_str(kwargs.get("testbed_name", None)) image_url = kwargs.get("image_url", None) hwsku = kwargs.get("hwsku", None) @@ -229,8 +239,10 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params features_exclude = parse_list_from_str(kwargs.get("features_exclude", None)) ptf_image_tag = kwargs.get("ptf_image_tag", None) - print("Creating test plan, topology: {}, name: {}, build info:{} {} {}".format(topology, test_plan_name, - repo_name, pr_id, build_id)) + print( + f"Creating test plan, topology: {topology}, name: {test_plan_name}, " + f"build info:{repo_name} {pr_id} {build_id}" + ) print("Test scripts to be covered in this test plan:") print(json.dumps(scripts, indent=4)) @@ -320,10 +332,9 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params "extra_params": {}, "priority": 10 } - print('Creating test plan with payload:\n{}'.format(json.dumps(payload, indent=4))) + print(f"Creating test plan with payload:\n{json.dumps(payload, indent=4)}") headers = { - "Authorization": "Bearer {}".format(self.get_token()), - "scheduler-site": "PRTest", + "Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json" } raw_resp = {} @@ -331,17 +342,16 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params raw_resp = requests.post(tp_url, headers=headers, data=json.dumps(payload), timeout=10) resp = raw_resp.json() except Exception as exception: - raise Exception("HTTP execute failure, url: {}, raw_resp: {}, exception: {}" - .format(tp_url, str(raw_resp), str(exception))) + raise Exception(f"HTTP execute failure, url: {tp_url}, raw_resp: {raw_resp}, exception: {str(exception)}") if not resp["data"]: - raise Exception("Pre deploy action failed with error: {}".format(resp["errmsg"])) + raise Exception(f"Create test plan failed with error: {resp['errmsg']}") if not resp["success"]: - raise Exception("Create test plan failed with error: {}".format(resp["errmsg"])) + raise Exception(f"Create test plan failed with error: {resp['errmsg']}") - print("Result of creating test plan: {}".format(str(resp["data"]))) + print(f"Result of creating test plan: {str(resp['data'])}") if output: - print("Store new test plan id to file {}".format(output)) + print(f"Store new test plan id to file {output}") with open(output, "a") as f: f.write(str(resp["data"]) + "\n") @@ -349,15 +359,14 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params def cancel(self, test_plan_id): - tp_url = "{}/test_plan/{}".format(self.scheduler_url, test_plan_id) - cancel_url = "{}/cancel".format(tp_url) + tp_url = f"{self.scheduler_url}/test_plan/{test_plan_id}" + cancel_url = f"{tp_url}/cancel" - print("Cancelling test plan at {}".format(cancel_url)) + print(f"Cancelling test plan at {cancel_url}") payload = json.dumps({}) headers = { - "Authorization": "Bearer {}".format(self.get_token()), - "scheduler-site": "PRTest", + "Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json" } @@ -366,73 +375,57 @@ def cancel(self, test_plan_id): raw_resp = requests.post(cancel_url, headers=headers, data=payload, timeout=10) resp = raw_resp.json() except Exception as exception: - raise Exception("HTTP execute failure, url: {}, raw_resp: {}, exception: {}" - .format(cancel_url, str(raw_resp), str(exception))) + raise Exception(f"HTTP execute failure, url: {cancel_url}, raw_resp: {str(raw_resp)}, " + f"exception: {str(exception)}") if not resp["success"]: - raise Exception("Cancel test plan failed with error: {}".format(resp["errmsg"])) + raise Exception(f"Cancel test plan failed with error: {resp['errmsg']}") - print("Result of cancelling test plan at {}:".format(tp_url)) + print(f"Result of cancelling test plan at {tp_url}:") print(str(resp["data"])) def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expected_result=None): - print("Polling progress and status of test plan at {}/scheduler/testplan/{}" - .format(self.frontend_url, test_plan_id)) - print("Polling interval: {} seconds".format(interval)) + print(f"Polling progress and status of test plan at {self.frontend_url}/scheduler/testplan/{test_plan_id}") + print(f"Polling interval: {interval} seconds") - poll_url = "{}/test_plan/{}/get_test_plan_status".format(self.scheduler_url, test_plan_id) - poll_url_no_auth = "{}/get_test_plan_status/{}".format(self.community_url, test_plan_id) + poll_url = f"{self.scheduler_url}/test_plan/{test_plan_id}/get_test_plan_status" + # In current polling task, initialize headers one time to avoid frequent token accessing + # For some tasks running over 24h, then token may expire, need a fresh headers = { + "Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json" } start_time = time.time() - http_exception_times = 0 - http_exception_times_no_auth = 0 - failed_poll_auth_url = False + poll_retry_times = 0 while timeout < 0 or (time.time() - start_time) < timeout: resp = None - # To make the transition smoother, first try to access the original API - if not failed_poll_auth_url: - try: - if self.with_auth: - headers["Authorization"] = "Bearer {}".format(self.get_token()) - resp = requests.get(poll_url, headers=headers, timeout=10).json() - except Exception as exception: - print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url, resp, - str(exception))) - http_exception_times = http_exception_times + 1 - if http_exception_times >= TOLERATE_HTTP_EXCEPTION_TIMES: - failed_poll_auth_url = True - else: - time.sleep(interval) - continue - - # If failed on poll auth url(most likely token has expired), try with no-auth url - else: - print("Polling test plan status failed with auth url, try with no-auth url.") - try: - resp = requests.get(poll_url_no_auth, headers={"Content-Type": "application/json"}, - timeout=10).json() - except Exception as e: - print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp, - repr(e))) - http_exception_times_no_auth = http_exception_times_no_auth + 1 - if http_exception_times_no_auth >= TOLERATE_HTTP_EXCEPTION_TIMES: - raise Exception( - "HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp, - repr(e))) - else: - time.sleep(interval) - continue + try: + resp = requests.get(poll_url, headers=headers, timeout=10).json() + + if not resp: + raise Exception("Poll test plan status failed with request error, no response!") + + if not resp["success"]: + raise Exception(f"Get test plan status failed with error: {resp['errmsg']}") + + resp_data = resp.get("data", None) + if not resp_data: + raise Exception("No valid data in response.") - if not resp: - raise Exception("Poll test plan status failed with request error, no response!") + except Exception as exception: + print(f"Failed to get valid response, url: {poll_url}, raw_resp: {resp}, exception: {str(exception)}") - if not resp["success"]: - raise Exception("Query test plan at {} failed with error: {}".format(poll_url, resp["errmsg"])) + # Refresh headers token to address token expiration issue + headers = { + "Authorization": f"Bearer {self.get_token()}", + "Content-Type": "application/json" + } - resp_data = resp.get("data", None) - if not resp_data: - raise Exception("No valid data in response: {}".format(str(resp))) + poll_retry_times = poll_retry_times + 1 + if poll_retry_times >= MAX_POLL_RETRY_TIMES: + raise Exception("Poll test plan status failed, exceeded the maximum number of retries.") + else: + time.sleep(interval) + continue current_tp_status = resp_data.get("status", None) current_tp_result = resp_data.get("result", None) @@ -441,11 +434,10 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte current_status = test_plan_status_factory(current_tp_status) expected_status = test_plan_status_factory(expected_state) - print("current test plan status: {}, expected status: {}".format(current_tp_status, expected_state)) + current_status.print_logs(test_plan_id, resp_data, expected_state, start_time) - if expected_status.get_status() == current_status.get_status(): - current_status.print_logs(test_plan_id, resp_data, start_time) - elif expected_status.get_status() < current_status.get_status(): + # If test plan has finished current step, its now status will behind the expected status + if expected_status.get_status() < current_status.get_status(): steps = None step_status = None runtime = resp_data.get("runtime", None) @@ -460,7 +452,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte # Print test summary test_summary = resp_data.get("runtime", {}).get("test_summary", None) if test_summary: - print("Test summary:\n{}".format(json.dumps(test_summary, indent=4))) + print(f"Test summary:\n{json.dumps(test_summary, indent=4)}") """ In below scenarios, need to return false to pipeline. @@ -477,38 +469,34 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte # Print error type and message err_code = resp_data.get("runtime", {}).get("err_code", None) if err_code: - print("Error type: {}".format(err_code)) + print(f"Error type: {err_code}") err_msg = resp_data.get("runtime", {}).get("message", None) if err_msg: - print("Error message: {}".format(err_msg)) + print(f"Error message: {err_msg}") - raise Exception("Test plan id: {}, status: {}, result: {}, Elapsed {:.0f} seconds. " - "Check {}/scheduler/testplan/{} for test plan status" - .format(test_plan_id, step_status, current_tp_result, time.time() - start_time, - self.frontend_url, - test_plan_id)) + raise Exception( + f"Test plan id: {test_plan_id}, status: {step_status}, " + f"result: {current_tp_result}, Elapsed {time.time() - start_time:.0f} seconds. " + f"Check {self.frontend_url}/scheduler/testplan/{test_plan_id} for test plan status" + ) if expected_result: if current_tp_result != expected_result: - raise Exception("Test plan id: {}, status: {}, result: {} not match expected result: {}, " - "Elapsed {:.0f} seconds. " - "Check {}/scheduler/testplan/{} for test plan status" - .format(test_plan_id, step_status, current_tp_result, - expected_result, time.time() - start_time, - self.frontend_url, - test_plan_id)) - - print("Current step status is {}".format(step_status)) + raise Exception( + f"Test plan id: {test_plan_id}, status: {step_status}, " + f"result: {current_tp_result} not match expected result: {expected_result}, " + f"Elapsed {time.time() - start_time:.0f} seconds. " + f"Check {self.frontend_url}/scheduler/testplan/{test_plan_id} for test plan status" + ) + + print(f"Current step status is {step_status}.") return - else: - print("Current test plan state is {}, waiting for the expected state {}".format(current_tp_status, - expected_state)) time.sleep(interval) else: raise PollTimeoutException( - "Max polling time reached, test plan at {} is not successfully finished or cancelled".format(poll_url) + f"Max polling time reached, test plan at {poll_url} is not successfully finished or cancelled" ) @@ -930,30 +918,28 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte # https://github.com/microsoft/azure-pipelines-tasks/issues/10331 args.test_plan_id = args.test_plan_id.replace("'", "") - print("Test plan utils parameters: {}".format(args)) - auth_env = ["CLIENT_ID"] - required_env = ["ELASTICTEST_SCHEDULER_BACKEND_URL"] + print(f"Test plan utils parameters: {args}") - if args.action in ["create", "cancel"]: - required_env.extend(auth_env) + required_env = ["ELASTICTEST_SCHEDULER_BACKEND_URL", "ELASTICTEST_MSAL_CLIENT_ID", "SONIC_AUTOMATION_UMI"] env = { "elastictest_scheduler_backend_url": os.environ.get("ELASTICTEST_SCHEDULER_BACKEND_URL"), - "elastictest_community_url": os.environ.get("ELASTICTEST_COMMUNITY_URL"), - "client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), + "elastictest_msal_client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), "frontend_url": os.environ.get("ELASTICTEST_FRONTEND_URL", "https://elastictest.org"), + "sonic_automation_umi": os.environ.get("SONIC_AUTOMATION_UMI"), } env_missing = [k.upper() for k, v in env.items() if k.upper() in required_env and not v] if env_missing: - print("Missing required environment variables: {}".format(env_missing)) + print(f"Missing required environment variables: {env_missing}") sys.exit(1) try: tp = TestPlanManager( env["elastictest_scheduler_backend_url"], - env["elastictest_community_url"], env["frontend_url"], - env["client_id"]) + env["elastictest_msal_client_id"], + env["sonic_automation_umi"] + ) if args.action == "create": pr_id = os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER") or os.environ.get( @@ -964,14 +950,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte job_name = os.environ.get("SYSTEM_JOBDISPLAYNAME") repo_name = args.repo_name if args.repo_name else os.environ.get("BUILD_REPOSITORY_NAME") - test_plan_prefix = "{repo}_{reason}_PR_{pr_id}_BUILD_{build_id}_JOB_{job_name}" \ - .format( - repo=repo, - reason=reason, - pr_id=pr_id, - build_id=build_id, - job_name=job_name - ).replace(' ', '_') + test_plan_prefix = f"{repo}_{reason}_PR_{pr_id}_BUILD_{build_id}_JOB_{job_name}".replace(' ', '_') scripts = args.scripts specific_param = json.loads(args.specific_param) @@ -989,7 +968,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte for num in range(args.test_plan_num): test_plan_name = copy.copy(test_plan_prefix) if args.test_plan_num > 1: - test_plan_name = "{}_{}".format(test_plan_name, num + 1) + test_plan_name = f"{test_plan_name}_{num + 1}" tp.create( args.topology, @@ -1033,8 +1012,8 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte tp.cancel(args.test_plan_id) sys.exit(0) except PollTimeoutException as e: - print("Polling test plan failed with exception: {}".format(repr(e))) + print(f"Polling test plan failed with exception: {repr(e)}") sys.exit(2) except Exception as e: - print("Operation failed with exception: {}".format(repr(e))) + print(f"Operation failed with exception: {repr(e)}") sys.exit(3) From 15d3eed205e438a4b6712e626513a2581283ae2f Mon Sep 17 00:00:00 2001 From: chunangli Date: Thu, 31 Oct 2024 15:55:25 +0800 Subject: [PATCH 2/7] refined Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 7ec72614d10..961a8730a28 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -170,16 +170,16 @@ def run_cmd(cmd): class TestPlanManager(object): - def __init__(self, scheduler_url, frontend_url, elastictest_msal_client_id, sonic_automation_umi): + def __init__(self, scheduler_url, frontend_url, client_id, managed_identity_id): self.scheduler_url = scheduler_url self.frontend_url = frontend_url - self.elastictest_msal_client_id = elastictest_msal_client_id - self.sonic_automation_umi = sonic_automation_umi + self.client_id = client_id + self.managed_identity_id = managed_identity_id def get_token(self): # 1. Run az login with re-try - az_login_cmd = f"az login --identity --username {self.sonic_automation_umi}" + az_login_cmd = f"az login --identity --username {self.managed_identity_id}" az_login_attempts = 0 while az_login_attempts < MAX_GET_TOKEN_RETRY_TIMES: try: @@ -198,7 +198,7 @@ def get_token(self): raise Exception(f"Failed to az login after {MAX_GET_TOKEN_RETRY_TIMES} attempts.") # 2. Get access token with re-try - get_token_cmd = f"az account get-access-token --resource {self.elastictest_msal_client_id}" + get_token_cmd = f"az account get-access-token --resource {self.client_id}" get_token_attempts = 0 while get_token_attempts < MAX_GET_TOKEN_RETRY_TIMES: try: @@ -920,13 +920,13 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte print(f"Test plan utils parameters: {args}") - required_env = ["ELASTICTEST_SCHEDULER_BACKEND_URL", "ELASTICTEST_MSAL_CLIENT_ID", "SONIC_AUTOMATION_UMI"] + required_env = ["ELASTICTEST_SCHEDULER_BACKEND_URL", "CLIENT_ID", "MANAGED_IDENTITY_ID"] env = { "elastictest_scheduler_backend_url": os.environ.get("ELASTICTEST_SCHEDULER_BACKEND_URL"), - "elastictest_msal_client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), + "client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), "frontend_url": os.environ.get("ELASTICTEST_FRONTEND_URL", "https://elastictest.org"), - "sonic_automation_umi": os.environ.get("SONIC_AUTOMATION_UMI"), + "managed_identity_id": os.environ.get("SONIC_AUTOMATION_UMI"), } env_missing = [k.upper() for k, v in env.items() if k.upper() in required_env and not v] if env_missing: @@ -937,8 +937,8 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte tp = TestPlanManager( env["elastictest_scheduler_backend_url"], env["frontend_url"], - env["elastictest_msal_client_id"], - env["sonic_automation_umi"] + env["client_id"], + env["managed_identity_id"] ) if args.action == "create": From 2f9d177b11415acbf620691ed3e6c8e9e789f58a Mon Sep 17 00:00:00 2001 From: chunangli Date: Mon, 4 Nov 2024 14:37:25 +0800 Subject: [PATCH 3/7] fix and refined Signed-off-by: Chun'ang Li --- .azure-pipelines/test_plan.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 961a8730a28..77371ba6b11 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -1,3 +1,10 @@ +""" +This script manages the creation, polling, and cancellation of test plans on multiple pipelines. + +Important!!! +- Any updates to this script must be tested on all dependent pipelines to ensure compatibility and prevent disruptions. +""" + from __future__ import print_function, division import argparse @@ -920,25 +927,25 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte print(f"Test plan utils parameters: {args}") - required_env = ["ELASTICTEST_SCHEDULER_BACKEND_URL", "CLIENT_ID", "MANAGED_IDENTITY_ID"] + required_env = ["ELASTICTEST_SCHEDULER_BACKEND_URL", "CLIENT_ID", "SONIC_AUTOMATION_UMI"] env = { - "elastictest_scheduler_backend_url": os.environ.get("ELASTICTEST_SCHEDULER_BACKEND_URL"), - "client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), - "frontend_url": os.environ.get("ELASTICTEST_FRONTEND_URL", "https://elastictest.org"), - "managed_identity_id": os.environ.get("SONIC_AUTOMATION_UMI"), + "ELASTICTEST_SCHEDULER_BACKEND_URL": os.environ.get("ELASTICTEST_SCHEDULER_BACKEND_URL"), + "CLIENT_ID": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"), + "FRONTEND_URL": os.environ.get("ELASTICTEST_FRONTEND_URL", "https://elastictest.org"), + "SONIC_AUTOMATION_UMI": os.environ.get("SONIC_AUTOMATION_UMI"), } env_missing = [k.upper() for k, v in env.items() if k.upper() in required_env and not v] if env_missing: - print(f"Missing required environment variables: {env_missing}") + print(f"Missing required environment variables: {env_missing}.") sys.exit(1) try: tp = TestPlanManager( - env["elastictest_scheduler_backend_url"], - env["frontend_url"], - env["client_id"], - env["managed_identity_id"] + env["ELASTICTEST_SCHEDULER_BACKEND_URL"], + env["FRONTEND_URL"], + env["CLIENT_ID"], + env["SONIC_AUTOMATION_UMI"] ) if args.action == "create": From 220e420e95314aa9960fe800c9bf0f1c446cfef5 Mon Sep 17 00:00:00 2001 From: chunangli Date: Mon, 4 Nov 2024 14:58:20 +0800 Subject: [PATCH 4/7] refine description Signed-off-by: Chun'ang Li --- .azure-pipelines/run-test-elastictest-template.yml | 7 +++++++ .azure-pipelines/test_plan.py | 8 +++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.azure-pipelines/run-test-elastictest-template.yml b/.azure-pipelines/run-test-elastictest-template.yml index ebd09be86b2..7dff58bb846 100644 --- a/.azure-pipelines/run-test-elastictest-template.yml +++ b/.azure-pipelines/run-test-elastictest-template.yml @@ -1,3 +1,10 @@ +# Description: +# - This template manages the entire life cycle of the Elastictest test plan, from creation to completion. +# +# Important!!!: +# - This template is referenced in multiple pipelines. +# - Any updates to this file must be tested on all dependent pipelines to ensure compatibility and prevent disruptions. + parameters: - name: TOPOLOGY type: string diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 77371ba6b11..1cc48fdbd31 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -1,8 +1,10 @@ """ -This script manages the creation, polling, and cancellation of test plans on multiple pipelines. +Description: +- This script provides access to Elastictest test plan API, including creating, canceling, and polling status. -Important!!! -- Any updates to this script must be tested on all dependent pipelines to ensure compatibility and prevent disruptions. +Important!!!: +- This script is downloaded in multiple pipelines. +- Any updates to this file must be tested on all dependent pipelines to ensure compatibility and prevent disruptions. """ from __future__ import print_function, division From 9e1b27c023d5dd2ac1ffe120757f0f5744ed9242 Mon Sep 17 00:00:00 2001 From: chunangli Date: Mon, 4 Nov 2024 15:01:02 +0800 Subject: [PATCH 5/7] refine description Signed-off-by: Chun'ang Li --- .azure-pipelines/run-test-elastictest-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/run-test-elastictest-template.yml b/.azure-pipelines/run-test-elastictest-template.yml index 7dff58bb846..882ab9ce6b9 100644 --- a/.azure-pipelines/run-test-elastictest-template.yml +++ b/.azure-pipelines/run-test-elastictest-template.yml @@ -1,5 +1,5 @@ # Description: -# - This template manages the entire life cycle of the Elastictest test plan, from creation to completion. +# - This template manages the entire life cycle of the Elastictest test plan in test pipelines. # # Important!!!: # - This template is referenced in multiple pipelines. From 8fd21d52981fe7a890e9ff772e3bd9cb36217e6d Mon Sep 17 00:00:00 2001 From: chunangli Date: Mon, 4 Nov 2024 15:34:09 +0800 Subject: [PATCH 6/7] Directly specify the value of MGMT_BRANCH as master. Because dynamic assignment does not take effect immediately for the conditional statement of pipeline yaml, the expected value of MGMT_BRANCH cannot be obtained, and the locally updated testplan.py cannot be used. Signed-off-by: Chun'ang Li --- azure-pipelines.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5ffbf11de09..7f00e940ce0 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -69,7 +69,7 @@ stages: MIN_WORKER: $(T0_INSTANCE_NUM) MAX_WORKER: $(T0_INSTANCE_NUM) KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: t0_2vlans_elastictest displayName: "kvmtest-t0-2vlans by Elastictest" @@ -85,7 +85,7 @@ stages: MAX_WORKER: $(T0_2VLANS_INSTANCE_NUM) DEPLOY_MG_EXTRA_PARAMS: "-e vlan_config=two_vlan_a" KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: t1_lag_elastictest displayName: "kvmtest-t1-lag by Elastictest" @@ -99,7 +99,7 @@ stages: MIN_WORKER: $(T1_LAG_INSTANCE_NUM) MAX_WORKER: $(T1_LAG_INSTANCE_NUM) KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: dualtor_elastictest displayName: "kvmtest-dualtor-t0 by Elastictest" @@ -114,7 +114,7 @@ stages: MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) COMMON_EXTRA_PARAMS: "--disable_loganalyzer " KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: multi_asic_elastictest displayName: "kvmtest-multi-asic-t1-lag by Elastictest" @@ -130,7 +130,7 @@ stages: MAX_WORKER: $(MULTI_ASIC_INSTANCE_NUM) NUM_ASIC: 4 KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: sonic_t0_elastictest displayName: "kvmtest-t0-sonic by Elastictest" @@ -147,7 +147,7 @@ stages: COMMON_EXTRA_PARAMS: "--neighbor_type=sonic " VM_TYPE: vsonic KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: dpu_elastictest displayName: "kvmtest-dpu by Elastictest" @@ -161,7 +161,7 @@ stages: MIN_WORKER: $(T0_SONIC_INSTANCE_NUM) MAX_WORKER: $(T0_SONIC_INSTANCE_NUM) KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" - job: onboarding_elastictest_t0 displayName: "onboarding t0 testcases by Elastictest - optional" @@ -177,7 +177,7 @@ stages: MIN_WORKER: $(T0_ONBOARDING_SONIC_INSTANCE_NUM) MAX_WORKER: $(T0_ONBOARDING_SONIC_INSTANCE_NUM) KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" TEST_SET: onboarding_t0 - job: onboarding_elastictest_t1 @@ -194,7 +194,7 @@ stages: MIN_WORKER: $(T1_LAG_ONBOARDING_INSTANCE_NUM) MAX_WORKER: $(T1_LAG_ONBOARDING_INSTANCE_NUM) KVM_IMAGE_BRANCH: $(BUILD_BRANCH) - MGMT_BRANCH: $(BUILD_BRANCH) + MGMT_BRANCH: "master" TEST_SET: onboarding_t1 # - job: onboarding_elastictest_dualtor @@ -211,7 +211,7 @@ stages: # MIN_WORKER: $(T0_DUALTOR_INSTANCE_NUM) # MAX_WORKER: $(T0_DUALTOR_INSTANCE_NUM) # KVM_IMAGE_BRANCH: $(BUILD_BRANCH) -# MGMT_BRANCH: $(BUILD_BRANCH) +# MGMT_BRANCH: "master" # TEST_SET: onboarding_dualtor # - job: wan_elastictest From 37889da6f27fff4e4794708eb663968be7cc1cee Mon Sep 17 00:00:00 2001 From: chunangli Date: Tue, 19 Nov 2024 11:04:28 +0800 Subject: [PATCH 7/7] remove unsued dump_kvm param Signed-off-by: Chun'ang Li --- .../run-test-elastictest-template.yml | 28 ------------------- .azure-pipelines/test_plan.py | 13 --------- 2 files changed, 41 deletions(-) diff --git a/.azure-pipelines/run-test-elastictest-template.yml b/.azure-pipelines/run-test-elastictest-template.yml index 0d1e74835f9..64a1922edf2 100644 --- a/.azure-pipelines/run-test-elastictest-template.yml +++ b/.azure-pipelines/run-test-elastictest-template.yml @@ -126,13 +126,6 @@ parameters: type: string default: "" - - name: DUMP_KVM_IF_FAIL - type: string - default: "False" # KVM dump has beed deleted - values: - - "True" - - "False" - - name: REQUESTER type: string default: "" @@ -250,7 +243,6 @@ steps: --mgmt-branch ${{ parameters.MGMT_BRANCH }} \ --stop-on-failure ${{ parameters.STOP_ON_FAILURE }} \ --retry-times ${{ parameters.RETRY_TIMES }} \ - --dump-kvm-if-fail ${{ parameters.DUMP_KVM_IF_FAIL }} \ --requester "${{ parameters.REQUESTER }}" \ --max-execute-seconds $((${{ parameters.MAX_RUN_TEST_MINUTES }} * 60)) \ --test-plan-num ${{ parameters.TEST_PLAN_NUM }} @@ -356,26 +348,6 @@ steps: displayName: "Run test" timeoutInMinutes: ${{ parameters.MAX_RUN_TEST_MINUTES }} - - ${{ if eq(parameters.DUMP_KVM_IF_FAIL, 'True') }}: - - script: | - set -e - echo "KVM dump" - - echo -e "\033[33mSONiC PR system-level test is powered by SONiC Elastictest, for any issue, please send email to sonicelastictest@microsoft.com \033[0m" - IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING" - for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}" - do - echo -e -n "\033[33mPlease visit Elastictest page \033[0m" - echo -n "$(ELASTICTEST_FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID " - echo -e "\033[33mfor detailed test plan progress \033[0m" - # When "KVMDUMP" finish, it changes into "FAILED", "CANCELLED" or "FINISHED" - echo "##[group][test_plan.py] poll KVMDUMP status" - python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state KVMDUMP - done - - condition: succeededOrFailed() - displayName: "KVM dump" - - script: | set -e echo "Try to cancel test plan $TEST_PLAN_ID, cancelling finished test plan has no effect." diff --git a/.azure-pipelines/test_plan.py b/.azure-pipelines/test_plan.py index 1cc48fdbd31..7753f6d6e64 100644 --- a/.azure-pipelines/test_plan.py +++ b/.azure-pipelines/test_plan.py @@ -329,7 +329,6 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params "affinity": affinity, "deploy_mg_param": deploy_mg_extra_params, "max_execute_seconds": kwargs.get("max_execute_seconds", None), - "dump_kvm_if_fail": kwargs.get("dump_kvm_if_fail", False), }, "type": test_plan_type, "trigger": { @@ -826,17 +825,6 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte required=False, help="Retry times after tests failed." ) - parser_create.add_argument( - "--dump-kvm-if-fail", - type=ast.literal_eval, - dest="dump_kvm_if_fail", - nargs='?', - const='True', - default='True', - required=False, - choices=[True, False], - help="Dump KVM DUT if test plan failed, only supports KVM test plan." - ) parser_create.add_argument( "--requester", type=str, @@ -1010,7 +998,6 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte platform=args.platform, stop_on_failure=args.stop_on_failure, retry_times=args.retry_times, - dump_kvm_if_fail=args.dump_kvm_if_fail, requester=args.requester, max_execute_seconds=args.max_execute_seconds, lock_wait_timeout_seconds=args.lock_wait_timeout_seconds,