Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .azure-pipelines/pr_test_scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ t0:
- arp/test_wr_arp.py
- arp/test_unknown_mac.py
- autorestart/test_container_autorestart.py
- autorestart/test_sleep_2_hours.py
- bgp/test_bgp_dual_asn.py
- bgp/test_bgp_fact.py
- bgp/test_bgp_gr_helper.py
Expand Down
33 changes: 11 additions & 22 deletions .azure-pipelines/run-test-elastictest-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ parameters:

- name: MAX_RUN_TEST_MINUTES
type: number
default: 480
default: 1800

- name: KVM_IMAGE_BRANCH
type: string
Expand Down Expand Up @@ -166,13 +166,13 @@ steps:
curl -u :$(MSSONIC-TOKEN) "${{ parameters.MGMT_URL }}&commitOrBranch=${{ parameters.MGMT_BRANCH }}&api-version=5.0-preview.1&path=.azure-pipelines%2Fpr_test_scripts.yaml" -o ./.azure-pipelines/pr_test_scripts.yaml
fi
displayName: "Download pr script"
- ${{ else }}:
- ${{ if ne(parameters.MGMT_BRANCH, 'master') }}:
- script: |
# Else, sonic-mgmt repo, if not master branch, need to download test_plan.py
set -ex
curl "https://raw.githubusercontent.com/sonic-net/sonic-mgmt/master/.azure-pipelines/test_plan.py" -o ./.azure-pipelines/test_plan.py
displayName: "Download test plan script"
# - ${{ else }}:
# - ${{ if ne(parameters.MGMT_BRANCH, 'master') }}:
# - script: |
# # Else, sonic-mgmt repo, if not master branch, need to download test_plan.py
# set -ex
# curl "https://raw.githubusercontent.com/sonic-net/sonic-mgmt/master/.azure-pipelines/test_plan.py" -o ./.azure-pipelines/test_plan.py
# displayName: "Download test plan script"

- script: |
# Check if azure cli is installed. If not, try to install it
Expand Down Expand Up @@ -331,12 +331,10 @@ steps:
echo -n "$(FRONTEND_URL)/scheduler/testplan/$TEST_PLAN_ID "
echo -e "\033[33mfor detailed test plan progress \033[0m"
# When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED"
echo "[test_plan.py] poll EXECUTING status, timeout 22 hours"
python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state EXECUTING --expected-result ${{ parameters.EXPECTED_RESULT }} --timeout 79200
echo "[test_plan.py] poll EXECUTING status"
python ./.azure-pipelines/test_plan.py poll -i $TEST_PLAN_ID --expected-state EXECUTING --expected-result ${{ parameters.EXPECTED_RESULT }}
RET=$?
# RC==2 means polling test plan timeout, do not consider it as failure so far
if [ $RET -ne 0 ] && [ $RET -ne 2 ]; then
echo "Test plan $TEST_PLAN_ID failed with RC $RET"
if [ $RET -ne 0 ]; then
((failure_count++))
fi
done
Expand Down Expand Up @@ -382,15 +380,6 @@ steps:
inlineScript: |
set -e
echo "Try to cancel test plan $TEST_PLAN_ID, cancelling finished test plan has no effect."

# If TEST_PLAN_TYPE is NIGHTLY, skip the cancel step
test_plan_type=${{ parameters.TEST_PLAN_TYPE }}
echo "TEST_PLAN_TYPE is $test_plan_type"
if [ "$test_plan_type" == "NIGHTLY" ]; then
echo "TEST_PLAN_TYPE is NIGHTLY, skip the cancel step as a dirty workaround for az login timeout issue"
exit 0
fi

IFS=',' read -ra TEST_PLAN_ID_LIST <<< "$TEST_PLAN_ID_LIST_STRING"
for TEST_PLAN_ID in "${TEST_PLAN_ID_LIST[@]}"
do
Expand Down
97 changes: 70 additions & 27 deletions .azure-pipelines/test_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(self):
super(ExecutingStatus, self).__init__(TestPlanStatus.EXECUTING)

def print_logs(self, test_plan_id, resp_data, start_time):
print("Test plan id: {}, status: {}, progress: {}%, elapsed: {:.0f} seconds"
print("Test plan id: {}, status: {}, progress: {:.2f}%, elapsed: {:.0f} seconds"
.format(test_plan_id, resp_data.get("status", None),
resp_data.get("progress", 0) * 100, time.time() - start_time))

Expand Down Expand Up @@ -152,8 +152,9 @@ def parse_list_from_str(s):

class TestPlanManager(object):

def __init__(self, url, frontend_url, client_id=None):
self.url = url
def __init__(self, scheduler_url, community_url, frontend_url, client_id=None):
self.scheduler_url = scheduler_url
self.community_url = community_url
self.frontend_url = frontend_url
self.client_id = client_id
self.with_auth = False
Expand Down Expand Up @@ -192,7 +193,7 @@ def get_token(self):

cmd = 'az account get-access-token --resource {}'.format(self.client_id)
attempt = 0
while (attempt < MAX_GET_TOKEN_RETRY_TIMES):
while attempt < MAX_GET_TOKEN_RETRY_TIMES:
try:
stdout, _, _ = self.az_run(cmd)

Expand All @@ -216,7 +217,7 @@ def get_token(self):
def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params="", kvm_build_id="",
min_worker=None, max_worker=None, pr_id="unknown", output=None,
common_extra_params="", **kwargs):
tp_url = "{}/test_plan".format(self.url)
tp_url = "{}/test_plan".format(self.scheduler_url)
testbed_name = parse_list_from_str(kwargs.get("testbed_name", None))
image_url = kwargs.get("image_url", None)
hwsku = kwargs.get("hwsku", None)
Expand Down Expand Up @@ -258,14 +259,16 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params
# If triggered by mgmt repo, use pull request id as the code base
sonic_mgmt_pull_request_id = ""
if MGMT_REPO_FLAG in kwargs.get("source_repo"):
sonic_mgmt_pull_request_id = pr_id
sonic_mgmt_pull_request_id = 15016

# If triggered by buildimage repo, use image built from the buildId
kvm_image_build_id = kvm_build_id
kvm_image_branch = kwargs.get("kvm_image_branch", "")
if BUILDIMAGE_REPO_FLAG in kwargs.get("source_repo"):
kvm_image_build_id = build_id
kvm_image_branch = ""

print(kvm_image_branch)
affinity = json.loads(kwargs.get("affinity", "[]"))
payload = {
"name": test_plan_name,
Expand All @@ -281,7 +284,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params
"lock_wait_timeout_seconds": kwargs.get("lock_wait_timeout_seconds", None),
},
"test_option": {
"stop_on_failure": kwargs.get("stop_on_failure", True),
"stop_on_failure": False,
"retry_times": kwargs.get("retry_times", 2),
"test_cases": {
"features": features,
Expand All @@ -295,7 +298,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params
"upgrade_image_param": kwargs.get("upgrade_image_param", None),
"release": "",
"kvm_image_build_id": kvm_image_build_id,
"kvm_image_branch": kvm_image_branch
"kvm_image_branch": "master"
},
"sonic_mgmt": {
"repo_url": sonic_mgmt_repo_url,
Expand All @@ -306,7 +309,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params
"specific_param": kwargs.get("specific_param", []),
"affinity": affinity,
"deploy_mg_param": deploy_mg_extra_params,
"max_execute_seconds": kwargs.get("max_execute_seconds", None),
"max_execute_seconds": 108000,
"dump_kvm_if_fail": kwargs.get("dump_kvm_if_fail", False),
},
"type": test_plan_type,
Expand Down Expand Up @@ -348,7 +351,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params

def cancel(self, test_plan_id):

tp_url = "{}/test_plan/{}".format(self.url, test_plan_id)
tp_url = "{}/test_plan/{}".format(self.scheduler_url, test_plan_id)
cancel_url = "{}/cancel".format(tp_url)

print("Cancelling test plan at {}".format(cancel_url))
Expand All @@ -373,32 +376,63 @@ def cancel(self, test_plan_id):
print("Result of cancelling test plan at {}:".format(tp_url))
print(str(resp["data"]))

def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expected_result=None):
def poll(self, test_plan_id, interval=1800, timeout=-1, expected_state="", expected_result=None):
print("Polling progress and status of test plan at {}/scheduler/testplan/{}"
.format(self.frontend_url, test_plan_id))
print("Polling interval: {} seconds".format(interval))

poll_url = "{}/test_plan/{}".format(self.url, test_plan_id)
poll_url = "{}/test_plan/{}/get_test_plan_status".format(self.scheduler_url, test_plan_id)
poll_url_no_auth = "{}/get_test_plan_status/{}".format(self.community_url, test_plan_id)
headers = {
"Content-Type": "application/json"
}
start_time = time.time()
http_exception_times = 0
http_exception_times_no_auth = 0
failed_poll_auth_url = False
while timeout < 0 or (time.time() - start_time) < timeout:
try:
if self.with_auth:
headers["Authorization"] = "Bearer {}".format(self.get_token())
resp = requests.get(poll_url, headers=headers, timeout=10).json()
except Exception as exception:
print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url, resp,
str(exception)))
http_exception_times = http_exception_times + 1
if http_exception_times >= TOLERATE_HTTP_EXCEPTION_TIMES:
raise Exception("HTTP execute failure, url: {}, raw_resp: {}, exception: {}"
.format(poll_url, resp, str(exception)))
else:
time.sleep(interval)
resp = None
# To make the transition smoother, first try to access the original API
if not failed_poll_auth_url:
try:
if self.with_auth:
headers["Authorization"] = "Bearer {}".format(self.get_token())
resp = requests.get(poll_url, headers=headers, timeout=10).json()
print("request url: ", poll_url)
print("response: ", resp)
except Exception as exception:
print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url, resp,
str(exception)))
http_exception_times = http_exception_times + 1
if http_exception_times >= TOLERATE_HTTP_EXCEPTION_TIMES:
failed_poll_auth_url = True
else:
time.sleep(interval)
continue

# If failed on poll auth url(most likely token has expired), try with no-auth url
else:
print("Polling test plan status failed with auth url, try with no-auth url.")
try:
resp = requests.get(poll_url_no_auth, headers={"Content-Type": "application/json"},
timeout=10).json()
print("request url: ", poll_url_no_auth)
print("response: ", resp)
except Exception as e:
print("HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp,
repr(e)))
http_exception_times_no_auth = http_exception_times_no_auth + 1
if http_exception_times_no_auth >= TOLERATE_HTTP_EXCEPTION_TIMES:
raise Exception(
"HTTP execute failure, url: {}, raw_resp: {}, exception: {}".format(poll_url_no_auth, resp,
repr(e)))
else:
time.sleep(interval)
continue

if not resp:
raise Exception("Poll test plan status failed with request error, no response!")

if not resp["success"]:
raise Exception("Query test plan at {} failed with error: {}".format(poll_url, resp["errmsg"]))

Expand Down Expand Up @@ -471,7 +505,14 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte
test_plan_id))

print("Current step status is {}".format(step_status))
return
# Check if the run test step has been running for more than 24 hours
# Make run test to hit 24h token issue
if expected_state == "EXECUTING":
if time.time() - start_time > 24 * 3600: # 24 hours in seconds
print("Run test has been running for more than 24 hours.")
return
else:
return
else:
print("Current test plan state is {}, waiting for the expected state {}".format(current_tp_status,
expected_state))
Expand Down Expand Up @@ -878,7 +919,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte
"--interval",
type=int,
required=False,
default=60,
default=600,
dest="interval",
help="Polling interval. Default 60 seconds."
)
Expand Down Expand Up @@ -911,6 +952,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte

env = {
"elastictest_scheduler_backend_url": os.environ.get("ELASTICTEST_SCHEDULER_BACKEND_URL"),
"elastictest_community_url": os.environ.get("ELASTICTEST_COMMUNITY_URL"),
"client_id": os.environ.get("ELASTICTEST_MSAL_CLIENT_ID"),
"frontend_url": os.environ.get("ELASTICTEST_FRONTEND_URL", "https://elastictest.org"),
}
Expand All @@ -922,6 +964,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte
try:
tp = TestPlanManager(
env["elastictest_scheduler_backend_url"],
env["elastictest_community_url"],
env["frontend_url"],
env["client_id"])

Expand Down
Loading