diff --git a/.artifactignore b/.artifactignore new file mode 100644 index 00000000000..9f327601071 --- /dev/null +++ b/.artifactignore @@ -0,0 +1,4 @@ +**/* +!target/*.bin +!target/*.log +!target/*.img.gz diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ac078ef24b7..6a700f98ecb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,14 +6,128 @@ trigger: - main -pool: - vmImage: 'ubuntu-latest' +pr: +- master -steps: -- script: echo Hello, world! - displayName: 'Run a one-line script' +name: $(TeamProject)_$(Build.DefinitionName)_$(SourceBranchName)_$(Date:yyyyMMdd)$(Rev:.r) -- script: | - echo Add other tasks to build, test, and deploy your project. - echo See https://aka.ms/yaml - displayName: 'Run a multi-line script' +resources: + repositories: + - repository: sonic-mgmt + type: github + name: Azure/sonic-mgmt + endpoint: build + +stages: +- stage: Build + pool: sonicbld + + jobs: + - job: + displayName: "broadcom" + timeoutInMinutes: 3600 + steps: + - checkout: self + submodules: recursive + displayName: 'Checkout code' + + - script: | + sudo modprobe overlay + CACHE_OPTIONS="SONIC_DPKG_CACHE_METHOD=rcache SONIC_DPKG_CACHE_SOURCE=/nfs/dpkg_cache/broadcom" + ENABLE_DOCKER_BASE_PULL=y make configure PLATFORM=broadcom + make USERNAME=admin SONIC_BUILD_JOBS=$(nproc) $CACHE_OPTIONS target/sonic-broadcom.bin + displayName: 'Build sonic image' + - script: | + sudo rm -rf fsroot + displayName: 'Clean up build artifacts' + - publish: $(System.DefaultWorkingDirectory)/ + artifact: sonic-buildimage.broadcom + displayName: "Archive sonic image" + + - job: + displayName: "kvm" + timeoutInMinutes: 3600 + steps: + - checkout: self + submodules: recursive + displayName: 'Checkout code' + + - script: | + echo $(Build.BuildNumber) + sudo modprobe overlay + CACHE_OPTIONS="SONIC_DPKG_CACHE_METHOD=rcache SONIC_DPKG_CACHE_SOURCE=/nfs/dpkg_cache/vs" + ENABLE_DOCKER_BASE_PULL=y make configure PLATFORM=vs + make USERNAME=admin SONIC_BUILD_JOBS=$(nproc) $CACHE_OPTIONS target/sonic-vs.img.gz + sudo cp target/sonic-vs.img.gz /nfs/azpl/kvmimage/sonic-vs.$(Build.BuildNumber).img.gz + displayName: 'Build sonic image' + - script: | + sudo rm -rf fsroot + displayName: 'Clean up build artifacts' + - publish: $(System.DefaultWorkingDirectory)/ + artifact: sonic-buildimage.kvm + displayName: "Archive sonic image" + +- stage: Test + pool: sonictest + variables: + - name: dut + value: vlab-01 + - name: tbname + value: vms-kvm-t0 + - name: inventory + value: veos_vtb + - name: testbed_file + value: vtestbed.csv + - name: ptf_name + value: ptf_vms6-1 + + jobs: + - job: + displayName: "kvmtest" + timeoutInMinutes: 240 + steps: + - script: | + sudo mkdir -p /data/sonic-vm/images + sudo cp -v /nfs/azpl/kvmimage/sonic-vs.$(Build.BuildNumber).img.gz /data/sonic-vm/images/sonic-vs.img.gz + sudo gzip -fd /data/sonic-vm/images/sonic-vs.img.gz + username=$(id -un) + sudo chown -R $username.$username /data/sonic-vm + pushd /data/sonic-mgmt + git remote update + git reset --hard origin/master + sed -i s/use_own_value/${username}/ ansible/veos_vtb + echo aaa > ansible/password.txt + docker exec sonic-mgmt bash -c "pushd /data/sonic-mgmt/ansible;./testbed-cli.sh -d /data/sonic-vm -m $(inventory) -t $(testbed_file) refresh-dut $(tbname) password.txt" && sleep 180 + displayName: "Setup T0 testbed" + - script: | + pwd + username=$(id -un) + + docker exec sonic-mgmt bash -c "/data/sonic-mgmt/tests/kvmtest.sh -n $(tbname) $(dut)" + + # save dut state if test fails + if [ $? != 0 ]; then + virsh_version=$(virsh --version) + if [ $virsh_version == "6.0.0" ]; then + rm -rf kvmdump + mkdir -p kvmdump + virsh -c qemu:///system list + virsh -c qemu:///system save $(dut) kvmdump/$(dut).memdmp + virsh -c qemu:///system dumpxml $(dut) > kvmdump/$(dut).xml + img=$(virsh -c qemu:///system domblklist $(dut) | grep vda | awk '{print $2}') + cp $img kvmdump/$(dut).img + sudo chown -R $username.$username kvmdump + virsh -c qemu:///system undefine $(dut) + fi + + rm -rf ptfdump + mkdir -p ptfdump + docker commit $ptf_name docker-ptf:$(Build.BuildNumber) + docker save docker-ptf:$(Build.BuildNumber) | gzip -c > ptfdump/docker-ptf-dump.gz + docker rmi docker-ptf:$(Build.BuildNumber) + + exit 2 + else + sudo rm /nfs/azpl/kvmimage/sonic-vs.$(Build.BuildNumber).img.gz + fi + displayName: "Run T0 tests" diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index eafea6a4ec1..fd5a9735bfa 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -306,6 +306,9 @@ sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/ sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/* sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/ sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker +sudo cp $IMAGE_CONFIGS/monit/container_checker $FILESYSTEM_ROOT/usr/bin/ +sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/container_checker + # Install custom-built openssh sshd sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/openssh-server_*.deb diff --git a/files/image_config/monit/conf.d/sonic-host b/files/image_config/monit/conf.d/sonic-host index 202c49f8d7b..17d7c64af7e 100644 --- a/files/image_config/monit/conf.d/sonic-host +++ b/files/image_config/monit/conf.d/sonic-host @@ -31,3 +31,5 @@ check program routeCheck with path "/usr/local/bin/route_check.py" every 5 cycles if status != 0 for 3 cycle then alert repeat every 1 cycles +check program container_checker with path "/usr/bin/container_checker" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/files/image_config/monit/container_checker b/files/image_config/monit/container_checker new file mode 100755 index 00000000000..abfbb34430b --- /dev/null +++ b/files/image_config/monit/container_checker @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +""" +container_checker + +This script is intended to be run by Monit. It will write an alerting message into +syslog if it found containers which were expected to run but were not running. At +the same time, if some containers were unexpected to run, it also writes an alerting +syslog message. Note that if print(...) statement in this script was executed, the +string in it will be appended to Monit syslog messages. + +The following is an example in Monit configuration file to show how Monit will run +this script: + +check program container_checker with path "/usr/bin/container_checker" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles +""" + +import subprocess +import sys + +import swsssdk +from sonic_py_common import multi_asic + + +def get_command_result(command): + """ + @summary: This function will execute the command and return the resulting output. + @return: A string which contains the output of command. + """ + command_stdout = "" + + try: + proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + shell=True, universal_newlines=True) + command_stdout, command_stderr = proc_instance.communicate() + if proc_instance.returncode != 0: + print("Failed to execute the command '{}'. Return code: '{}'".format( + command, proc_instance.returncode)) + sys.exit(1) + except (OSError, ValueError) as err: + print("Failed to execute the command '{}'. Error: '{}'".format(command, err)) + sys.exit(2) + + return command_stdout.rstrip().split("\n") + + +def get_expected_running_containers(): + """ + @summary: This function will get the expected running containers by following the rule: + The 'state' field of container in 'FEATURE' table should not be 'disabled'. Then + if the device has Multi-ASIC, this function will get container list by determining the + value of field 'has_global_scope', the number of ASICs and the value of field + 'has_per_asic_scope'. If the device has single ASIC, the container name was put into + the list. + @return: A set which contains the expected running containers. + """ + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + feature_table = config_db.get_table("FEATURE") + + expected_running_containers = set() + + for container_name in feature_table.keys(): + if feature_table[container_name]["state"] != "disabled": + if multi_asic.is_multi_asic(): + if feature_table[container_name]["has_global_scope"] == "True": + expected_running_containers.add(container_name) + if feature_table[container_name]["has_per_asic_scope"] == "True": + num_asics = multi_asic.get_num_asics() + for asic_id in range(num_asics): + expected_running_containers.add(container_name + str(asic_id)) + else: + expected_running_containers.add(container_name) + + return expected_running_containers + + +def get_current_running_containers(): + """ + @summary: This function will get the current running container list by analyzing the + output of command `docker ps`. + @return: A set which contains the current running contianers. + """ + running_containers = set() + + command = "docker ps" + command_stdout = get_command_result(command) + for line in command_stdout[1:]: + running_containers.add(line.split()[-1].strip()) + + return running_containers + + +def main(): + """ + @summary: This function will compare the difference between the current running containers + and the containers which were expected to run. If containers which were exepcted + to run were not running, then an alerting message will be written into syslog. + """ + expected_running_containers = get_expected_running_containers() + current_running_containers = get_current_running_containers() + + not_running_containers = expected_running_containers.difference(current_running_containers) + if not_running_containers: + print("Expected containers not running: " + ", ".join(not_running_containers)) + sys.exit(3) + + unexpected_running_containers = current_running_containers.difference(expected_running_containers) + if unexpected_running_containers: + print("Unexpected running containers: " + ", ".join(unexpected_running_containers)) + sys.exit(4) + + +if __name__ == "__main__": + main()